typogrowth 0.9.7 → 0.9.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
- metadata.gz: 53a02d4e8ed6d038744689a72340501f7320edbd
4
- data.tar.gz: 13da2fe67adcd6bfb6794f97371badaf639bf16a
3
+ metadata.gz: d51d831d3067a678c4d7f440f08532b0aadcf108
4
+ data.tar.gz: 296addd6996aafb254597f7c81092edd1fa75241
5
5
  !binary "U0hBNTEy":
6
- metadata.gz: 1843bc3d26f9a29e4c5ae603a1778ead2534007a0343723445c114440551198999db0420d7abfc084c7ea0e5d60d25bec38437a112a26cda8f1e26332e46926a
7
- data.tar.gz: 897f8982e04275fc5529ba666c1317493d43f3e9a8a5159a50aad8d07b9aad4d1d209402c6261800584fbf44ac51448364fb78372cc572f2cfdc3b8536acc5ef
6
+ metadata.gz: 2e385ed3b085ebdf1a50b67003f42e5184ca7a7f047272aac1391ecc72a082cef9b5a462a098cce8bb283a0c0a82b8986552c43779b4ebcfb5231b4ec43bf4c2
7
+ data.tar.gz: babea8b38641f4da8ba5f90907f9b6aba0d86316a2ba2dc8239a5caecf98fd2b51dc87b9ec52483e27c3137f5724e65b020ab945f432ff952a3458fa6f9d9cc3
@@ -4,6 +4,10 @@ Given(/^the input string is "(.*?)"$/) do |str|
4
4
  @content = str
5
5
  end
6
6
 
7
+ Given(/^the input string is$/) do |str|
8
+ @content = str
9
+ end
10
+
7
11
  When(/^input string is processed with Typogrowl’s typography parser$/) do
8
12
  @content.gsub! /\\+"/, '"'
9
13
  @typo = Typogrowth.parse @content
@@ -14,6 +18,11 @@ When(/^input string is processed with Typogrowl’s typography parser with lang
14
18
  @typo = Typogrowth.parse @content, lang: lang
15
19
  end
16
20
 
21
+ When(/^input string is processed with Typogrowl’s typography parser with section "(.*?)"$/) do |sect|
22
+ @content.gsub! /\\+"/, '"'
23
+ @typo = @content.typo sections: sect.to_sym
24
+ end
25
+
17
26
  When(/^input string is modified inplace with typo!$/) do
18
27
  @typoed = @content.dup
19
28
  @typoed.typo!
@@ -31,12 +40,16 @@ Then(/^the typoed result should equal to "(.*?)"$/) do |str|
31
40
  @typo.should == str
32
41
  end
33
42
 
43
+ Then(/^the typoed result should equal to$/) do |str|
44
+ @typo.should == str
45
+ end
46
+
34
47
  Then(/^the call to string’s typo should equal to "(.*?)"$/) do |str|
35
48
  @content.typo.should == str
36
49
  end
37
50
 
38
51
  Then(/^the call to string’s typo with lang "(.*?)" should equal to "(.*?)"$/) do |lang, str|
39
- @content.typo('ru').should == str
52
+ @content.typo(lang: 'ru').should == str
40
53
  end
41
54
 
42
55
  Then(/^typoed result should equal to "(.*?)"$/) do |str|
@@ -76,7 +76,8 @@ Feature: Text is to be typographed (spacing and pubctuation are to be sanitized)
76
76
 
77
77
  Examples:
78
78
  | input | output |
79
- | "<p><img src="http://mudasobwa.ru/i/self.jpg">Here: http://wikipedia.ru</p>" | "<p><img src="http://mudasobwa.ru/i/self.jpg">Here: http://wikipedia.ru</p>" |
79
+ | "<p><img src="http://mudasobwa.ru/i/self.jpg">Here: http://wikipedia.ru</p>" | "<p><img src="http://mudasobwa.ru/i/self.jpg">Here: http://wikipedia.ru</p>" |
80
+ | "<p>http://mudasobwa.ru/i/self.jpg With caption<br/> <small><a href='http://wikipedia.ru'>Wiki</a></small> </p>" | "<p>http://mudasobwa.ru/i/self.jpg With caption<br/> <small><a href='http://wikipedia.ru'>Wiki</a></small> </p>" |
80
81
 
81
82
  Scenario Outline: Language recognition
82
83
  Given the input string is <input>
@@ -98,3 +99,41 @@ Feature: Text is to be typographed (spacing and pubctuation are to be sanitized)
98
99
  | "Here 'you' go." | "Here “you” go." |
99
100
  | "Тут 'русский' язык." | "Тут «русский» язык." |
100
101
 
102
+ Scenario Outline: Section pick-up
103
+ Given the input string is <input>
104
+ When input string is processed with Typogrowl’s typography parser with section "quotes"
105
+ Then the typoed result should equal to <output>
106
+
107
+ Examples:
108
+ | input | output |
109
+ | "Here 'you' - go." | "Here “you” - go." |
110
+ | "Тут 'русский' --- язык." | "Тут «русский» --- язык." |
111
+
112
+ Scenario Outline: Predefined shadows
113
+ Given the input string is <input>
114
+ When input string is processed with Typogrowl’s typography parser
115
+ Then the typoed result should equal to <output>
116
+
117
+ Examples:
118
+ | input | output |
119
+ | "This is λsystem.dllλ file." | "This is λsystem.dllλ file." |
120
+ | "This is ✓8:35✓ time." | "This is ✓8:35✓ time." |
121
+
122
+ Scenario: Inplace tags
123
+ Given the input string is
124
+ """
125
+ http://qipowl.herokuapp.com/images/owl.png
126
+
127
+ ☞ Video embedded:
128
+
129
+ http://www.youtube.com/watch?v=KFKxlYNfT_o
130
+ """
131
+ When input string is processed with Typogrowl’s typography parser
132
+ Then the typoed result should equal to
133
+ """
134
+ http://qipowl.herokuapp.com/images/owl.png
135
+
136
+ ☞ Video embedded:
137
+
138
+ http://www.youtube.com/watch?v=KFKxlYNfT_o
139
+ """
@@ -0,0 +1,3 @@
1
+ :grip :
2
+ - 'λ'
3
+ - '✓'
@@ -1,4 +1,4 @@
1
- :quotes :
1
+ :sequence :
2
2
  :punctuation :
3
3
  :re : '(?<quote>''|"|\))\s*(?<punct>[.,!?]+)'
4
4
  :default :
@@ -6,6 +6,7 @@
6
6
  :ru :
7
7
  - '\k<quote>\k<punct>'
8
8
 
9
+ :quotes :
9
10
  # That's a 6.3" man, he sees sunsets at 10°20'30" E.
10
11
  # ⇑
11
12
  :inch :
@@ -78,7 +79,6 @@
78
79
  :us :
79
80
  - '’'
80
81
  - '”'
81
-
82
82
  # That's a 6.3" man, he sees sunsets at 10°20'30" E.
83
83
  # ⇑
84
84
  :apostrophe_post :
@@ -113,12 +113,24 @@
113
113
  - '”'
114
114
 
115
115
  :punctuation :
116
+ :bloody_quote_en :
117
+ :re : '(“)(?:\s+)'
118
+ :default :
119
+ - '\1'
120
+ :ru :
121
+ - '\1 '
122
+ :bloody_quote_ru :
123
+ :re : '(?:\s+)(“)'
124
+ :default :
125
+ - ' \1'
126
+ :ru :
127
+ - '\1'
116
128
  :opening_orphan :
117
- :re : '([(¿¡§#№]|\p{Sc})(?:\s+)'
129
+ :re : '([(¿¡§#№‘„«]|\p{Sc})(?:\s+)'
118
130
  :default :
119
131
  - '\1'
120
132
  :closing_orphan :
121
- :re : '(?:\s+)([.,:;!?)])'
133
+ :re : '(?:\s+)([.,:;!?)”’»])'
122
134
  :default :
123
135
  - '\1'
124
136
  :closing_clamped :
@@ -134,7 +146,7 @@
134
146
  :default :
135
147
  - '…'
136
148
  :mdash :
137
- :re : '\s*(?<dash> - |--|–|—)\s*'
149
+ :re : '\p{Space}*(?<dash>\s-\s|--|–|—)\s*'
138
150
  :default :
139
151
  - '—'
140
152
  :ru :
@@ -153,6 +165,6 @@
153
165
  :default :
154
166
  - ' \1'
155
167
  :extra_spaces :
156
- :re : '\s+'
168
+ :re : ' +'
157
169
  :default :
158
170
  - ' '
@@ -33,14 +33,6 @@ module Typogrowth
33
33
  class Parser
34
34
  attr_reader :yaml, :shadows
35
35
 
36
- def self.safe_delimiters str
37
- delimiters = ['❮', '❯']
38
- loop do
39
- break delimiters unless str.match(/#{delimiters.join('|')}/)
40
- delimiters.map! {|d| d*2}
41
- end
42
- end
43
-
44
36
  #
45
37
  # Recursively merges the initial settings with custom.
46
38
  #
@@ -82,14 +74,15 @@ module Typogrowth
82
74
  # @param str [String] the string to be typographyed inplace
83
75
  # @param lang the language to use rules for
84
76
  #
85
- def parse str, lang: :default, shadows: []
77
+ def parse str, lang: :default, shadows: [], sections: nil
86
78
  lang = lang.to_sym
87
- delims = Parser.safe_delimiters str
79
+ delims = str.safe_delimiters
88
80
  str.split(/\R{2,}/).map { |para|
89
- @shadows.concat([*shadows]).uniq.each { |re|
81
+ [*shadows].concat(@shadows).uniq.each { |re|
90
82
  para.gsub!(re) { |m| "#{delims.first}#{Base64.encode64 m}#{delims.last}" }
91
83
  }
92
84
  @yaml.each { |key, values|
85
+ next if sections && ![*sections].include?(key)
93
86
  values.each { |k, v|
94
87
  if !!v[:re]
95
88
  v[lang] = v[:default] if (!v[lang] || v[lang].size.zero?)
@@ -128,12 +121,25 @@ module Typogrowth
128
121
  end
129
122
 
130
123
  def is_ru? str, shadows: []
131
- clean = @shadows.concat([*shadows]).uniq.inject(str) { |memo, re|
124
+ clean = [*shadows].concat(@shadows).uniq.inject(str) { |memo, re|
132
125
  memo.gsub(re, '')
133
126
  }
134
127
  clean.scan(/[А-Яа-я]/).size > clean.length / 3
135
128
  end
136
129
 
130
+ def defuse str, elements, shadows: []
131
+ delims = str.safe_delimiters
132
+ s = str.dup
133
+ [*shadows].concat(@shadows).uniq.each { |re|
134
+ s.gsub!(re) { |m| "#{delims.first}#{Base64.encode64 m}#{delims.last}" }
135
+ }
136
+ # s.gsub(Regexp.union(elements), ' \1 ')
137
+ s.gsub(/(#{elements.map {|e| Regexp.escape e}.join('|')})/, ' \1 ')
138
+ .gsub(/#{delims.first}(.*?)#{delims.last}/m) { |m|
139
+ Base64.decode64(m).force_encoding('UTF-8')
140
+ }
141
+ end
142
+
137
143
  def add_shadows re
138
144
  @shadows.concat [*re]
139
145
  end
@@ -143,13 +149,13 @@ module Typogrowth
143
149
  end
144
150
 
145
151
  # Out-of-place version of `String` typographing. See #parse!
146
- def self.parse str, lang: :default, shadows: []
147
- Parser.new.parse str, lang: lang, shadows: shadows
152
+ def self.parse str, lang: :default, shadows: [], sections: nil
153
+ Parser.new.parse str, lang: lang, shadows: shadows, sections: sections
148
154
  end
149
155
 
150
156
  # Out-of-place version of `String` typographing. See #parse!
151
- def self.parse! str, lang: :default, shadows: []
152
- str.replace self.parse str, lang: lang, shadows: shadows
157
+ def self.parse! str, lang: :default, shadows: [], sections: nil
158
+ str.replace self.parse str, lang: lang, shadows: shadows, sections: sections
153
159
  end
154
160
 
155
161
  # Out-of-place version of `String` typographing. See #parse!
@@ -157,30 +163,47 @@ module Typogrowth
157
163
  @@instance.is_ru? str, shadows: shadows
158
164
  end
159
165
 
166
+ # Out-of-place version of `String` typographing. See #parse!
167
+ def self.defuse str, elements, shadows: []
168
+ Parser.new.defuse str, elements, shadows: shadows
169
+ end
170
+
160
171
  DEFAULT_SET = 'typogrowth'
161
- HTML_TAG_RE = /<[^>]*>/
172
+ DEFAULT_SHADOWS = 'shadows'
173
+ HTML_TAG_RE = /<\s*[A-Za-z][^>]*>/
162
174
 
163
- def initialize file = nil
175
+ def initialize file = nil, shadows = nil
164
176
  file = DEFAULT_SET unless file
165
177
  @yaml = YAML.load_file "#{File.dirname(__FILE__)}/config/#{file}.yaml"
166
178
  @yaml.delete(:placeholder)
167
- @shadows = [HTML_TAG_RE, URI.regexp(['ftp', 'http', 'https', 'mailto'])]
179
+ shadows = DEFAULT_SHADOWS unless shadows
180
+ shadows = YAML.load_file("#{File.dirname(__FILE__)}/config/#{shadows}.yaml")
181
+ @shadows = ([
182
+ (shadows[:custom].map { |g| /#{g}/ } if shadows[:custom]),
183
+ (shadows[:grip].map { |g| /(?<=#{g})([^#{g}]*)(?=#{g})/m } if shadows[:grip]),
184
+ HTML_TAG_RE,
185
+ URI.regexp(['ftp', 'http', 'https', 'mailto'])
186
+ ] - [nil]).flatten
168
187
  end
169
188
 
170
189
  # Ready-to-use single instance
171
190
  @@instance = Parser.new
172
191
  end
173
192
 
174
- def self.parse str, lang: :default, shadows: []
175
- Parser.parse str, lang: lang, shadows: shadows
193
+ def self.parse str, lang: :default, shadows: [], sections: nil
194
+ Parser.parse str, lang: lang, shadows: shadows, sections: sections
176
195
  end
177
196
 
178
- def self.parse! str, lang: :default, shadows: []
179
- Parser.parse! str, lang: lang, shadows: shadows
197
+ def self.parse! str, lang: :default, shadows: [], sections: nil
198
+ Parser.parse! str, lang: lang, shadows: shadows, sections: sections
180
199
  end
181
200
 
182
201
  def self.is_ru? str, shadows: []
183
202
  Parser.is_ru? str, shadows: shadows
184
203
  end
204
+
205
+ def self.defuse str, elements, shadows: []
206
+ Parser.defuse str, elements, shadows: shadows
207
+ end
185
208
  end
186
209
 
@@ -1,21 +1,63 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  require 'i18n'
4
+ require 'base64'
4
5
  require_relative '../typogrowth'
5
6
 
6
7
  class String
8
+ PUNCTUATION = '¿?¡!()„“”‚‘’«».,:;'.split //
9
+
7
10
  # Typographyes the string and returns a result
8
11
  # See Typogrowth::Parser#parse
9
- def typo lang = nil
10
- Typogrowth.parse(self, lang: lang ? lang : is_ru? ? "ru" : I18n.locale)
12
+ def typo lang: nil, sections: nil, shadows: nil
13
+ Typogrowth.parse(
14
+ self,
15
+ lang: lang ? lang : is_ru? ? "ru" : I18n.locale,
16
+ shadows: shadows,
17
+ sections: sections
18
+ )
11
19
  end
12
20
  # Typographyes the string inplace
13
21
  # See Typogrowth::Parser#parse!
14
- def typo! lang = nil
15
- Typogrowth.parse!(self, lang: lang ? lang : is_ru? ? "ru" : I18n.locale)
22
+ def typo! lang: nil, sections: nil, shadows: nil
23
+ Typogrowth.parse!(
24
+ self,
25
+ lang: lang ? lang : is_ru? ? "ru" : I18n.locale,
26
+ shadows: shadows,
27
+ sections: sections
28
+ )
16
29
  end
17
30
 
18
- def is_ru? shadows = []
31
+ def is_ru? shadows: []
19
32
  Typogrowth.is_ru? self, shadows: shadows
20
33
  end
34
+
35
+ def defuse elements = nil, shadows: []
36
+ Typogrowth.defuse self, elements || PUNCTUATION, shadows: shadows
37
+ end
38
+
39
+ def psub pattern, exclusion, replacement
40
+ delims = self.safe_delimiters
41
+ s = self.dup
42
+ [*exclusion].each { |re|
43
+ re = /#{re}/ unless Regexp === re
44
+ s.gsub!(re) { |m| "#{delims.first}#{Base64.encode64 m}#{delims.last}" }
45
+ }
46
+ s.gsub! pattern, replacement
47
+ s.gsub!(/#{delims.first}(.*?)#{delims.last}/m) { |m|
48
+ Base64.decode64(m).force_encoding('UTF-8')
49
+ }
50
+ s
51
+ end
52
+
53
+ # private
54
+
55
+ def safe_delimiters
56
+ delimiters = ['❮', '❯']
57
+ loop do
58
+ break delimiters unless self.match(/#{delimiters.join('|')}/)
59
+ delimiters.map! {|d| d*2}
60
+ end
61
+ end
62
+
21
63
  end
@@ -1,3 +1,3 @@
1
1
  module Typogrowth
2
- VERSION = "0.9.7"
2
+ VERSION = "0.9.8"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: typogrowth
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.7
4
+ version: 0.9.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexei Matyushkin
@@ -128,6 +128,7 @@ files:
128
128
  - features/step_definitions/typogrowth_steps.rb
129
129
  - features/support/env.rb
130
130
  - features/typogrowth.feature
131
+ - lib/config/shadows.yaml
131
132
  - lib/config/typogrowth.yaml
132
133
  - lib/typogrowth.rb
133
134
  - lib/typogrowth/string.rb