typogrowth 0.9.7 → 0.9.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
- metadata.gz: 53a02d4e8ed6d038744689a72340501f7320edbd
4
- data.tar.gz: 13da2fe67adcd6bfb6794f97371badaf639bf16a
3
+ metadata.gz: d51d831d3067a678c4d7f440f08532b0aadcf108
4
+ data.tar.gz: 296addd6996aafb254597f7c81092edd1fa75241
5
5
  !binary "U0hBNTEy":
6
- metadata.gz: 1843bc3d26f9a29e4c5ae603a1778ead2534007a0343723445c114440551198999db0420d7abfc084c7ea0e5d60d25bec38437a112a26cda8f1e26332e46926a
7
- data.tar.gz: 897f8982e04275fc5529ba666c1317493d43f3e9a8a5159a50aad8d07b9aad4d1d209402c6261800584fbf44ac51448364fb78372cc572f2cfdc3b8536acc5ef
6
+ metadata.gz: 2e385ed3b085ebdf1a50b67003f42e5184ca7a7f047272aac1391ecc72a082cef9b5a462a098cce8bb283a0c0a82b8986552c43779b4ebcfb5231b4ec43bf4c2
7
+ data.tar.gz: babea8b38641f4da8ba5f90907f9b6aba0d86316a2ba2dc8239a5caecf98fd2b51dc87b9ec52483e27c3137f5724e65b020ab945f432ff952a3458fa6f9d9cc3
@@ -4,6 +4,10 @@ Given(/^the input string is "(.*?)"$/) do |str|
4
4
  @content = str
5
5
  end
6
6
 
7
+ Given(/^the input string is$/) do |str|
8
+ @content = str
9
+ end
10
+
7
11
  When(/^input string is processed with Typogrowl’s typography parser$/) do
8
12
  @content.gsub! /\\+"/, '"'
9
13
  @typo = Typogrowth.parse @content
@@ -14,6 +18,11 @@ When(/^input string is processed with Typogrowl’s typography parser with lang
14
18
  @typo = Typogrowth.parse @content, lang: lang
15
19
  end
16
20
 
21
+ When(/^input string is processed with Typogrowl’s typography parser with section "(.*?)"$/) do |sect|
22
+ @content.gsub! /\\+"/, '"'
23
+ @typo = @content.typo sections: sect.to_sym
24
+ end
25
+
17
26
  When(/^input string is modified inplace with typo!$/) do
18
27
  @typoed = @content.dup
19
28
  @typoed.typo!
@@ -31,12 +40,16 @@ Then(/^the typoed result should equal to "(.*?)"$/) do |str|
31
40
  @typo.should == str
32
41
  end
33
42
 
43
+ Then(/^the typoed result should equal to$/) do |str|
44
+ @typo.should == str
45
+ end
46
+
34
47
  Then(/^the call to string’s typo should equal to "(.*?)"$/) do |str|
35
48
  @content.typo.should == str
36
49
  end
37
50
 
38
51
  Then(/^the call to string’s typo with lang "(.*?)" should equal to "(.*?)"$/) do |lang, str|
39
- @content.typo('ru').should == str
52
+ @content.typo(lang: 'ru').should == str
40
53
  end
41
54
 
42
55
  Then(/^typoed result should equal to "(.*?)"$/) do |str|
@@ -76,7 +76,8 @@ Feature: Text is to be typographed (spacing and pubctuation are to be sanitized)
76
76
 
77
77
  Examples:
78
78
  | input | output |
79
- | "<p><img src="http://mudasobwa.ru/i/self.jpg">Here: http://wikipedia.ru</p>" | "<p><img src="http://mudasobwa.ru/i/self.jpg">Here: http://wikipedia.ru</p>" |
79
+ | "<p><img src="http://mudasobwa.ru/i/self.jpg">Here: http://wikipedia.ru</p>" | "<p><img src="http://mudasobwa.ru/i/self.jpg">Here: http://wikipedia.ru</p>" |
80
+ | "<p>http://mudasobwa.ru/i/self.jpg With caption<br/> <small><a href='http://wikipedia.ru'>Wiki</a></small> </p>" | "<p>http://mudasobwa.ru/i/self.jpg With caption<br/> <small><a href='http://wikipedia.ru'>Wiki</a></small> </p>" |
80
81
 
81
82
  Scenario Outline: Language recognition
82
83
  Given the input string is <input>
@@ -98,3 +99,41 @@ Feature: Text is to be typographed (spacing and pubctuation are to be sanitized)
98
99
  | "Here 'you' go." | "Here “you” go." |
99
100
  | "Тут 'русский' язык." | "Тут «русский» язык." |
100
101
 
102
+ Scenario Outline: Section pick-up
103
+ Given the input string is <input>
104
+ When input string is processed with Typogrowl’s typography parser with section "quotes"
105
+ Then the typoed result should equal to <output>
106
+
107
+ Examples:
108
+ | input | output |
109
+ | "Here 'you' - go." | "Here “you” - go." |
110
+ | "Тут 'русский' --- язык." | "Тут «русский» --- язык." |
111
+
112
+ Scenario Outline: Predefined shadows
113
+ Given the input string is <input>
114
+ When input string is processed with Typogrowl’s typography parser
115
+ Then the typoed result should equal to <output>
116
+
117
+ Examples:
118
+ | input | output |
119
+ | "This is λsystem.dllλ file." | "This is λsystem.dllλ file." |
120
+ | "This is ✓8:35✓ time." | "This is ✓8:35✓ time." |
121
+
122
+ Scenario: Inplace tags
123
+ Given the input string is
124
+ """
125
+ http://qipowl.herokuapp.com/images/owl.png
126
+
127
+ ☞ Video embedded:
128
+
129
+ http://www.youtube.com/watch?v=KFKxlYNfT_o
130
+ """
131
+ When input string is processed with Typogrowl’s typography parser
132
+ Then the typoed result should equal to
133
+ """
134
+ http://qipowl.herokuapp.com/images/owl.png
135
+
136
+ ☞ Video embedded:
137
+
138
+ http://www.youtube.com/watch?v=KFKxlYNfT_o
139
+ """
@@ -0,0 +1,3 @@
1
+ :grip :
2
+ - 'λ'
3
+ - '✓'
@@ -1,4 +1,4 @@
1
- :quotes :
1
+ :sequence :
2
2
  :punctuation :
3
3
  :re : '(?<quote>''|"|\))\s*(?<punct>[.,!?]+)'
4
4
  :default :
@@ -6,6 +6,7 @@
6
6
  :ru :
7
7
  - '\k<quote>\k<punct>'
8
8
 
9
+ :quotes :
9
10
  # That's a 6.3" man, he sees sunsets at 10°20'30" E.
10
11
  # ⇑
11
12
  :inch :
@@ -78,7 +79,6 @@
78
79
  :us :
79
80
  - '’'
80
81
  - '”'
81
-
82
82
  # That's a 6.3" man, he sees sunsets at 10°20'30" E.
83
83
  # ⇑
84
84
  :apostrophe_post :
@@ -113,12 +113,24 @@
113
113
  - '”'
114
114
 
115
115
  :punctuation :
116
+ :bloody_quote_en :
117
+ :re : '(“)(?:\s+)'
118
+ :default :
119
+ - '\1'
120
+ :ru :
121
+ - '\1 '
122
+ :bloody_quote_ru :
123
+ :re : '(?:\s+)(“)'
124
+ :default :
125
+ - ' \1'
126
+ :ru :
127
+ - '\1'
116
128
  :opening_orphan :
117
- :re : '([(¿¡§#№]|\p{Sc})(?:\s+)'
129
+ :re : '([(¿¡§#№‘„«]|\p{Sc})(?:\s+)'
118
130
  :default :
119
131
  - '\1'
120
132
  :closing_orphan :
121
- :re : '(?:\s+)([.,:;!?)])'
133
+ :re : '(?:\s+)([.,:;!?)”’»])'
122
134
  :default :
123
135
  - '\1'
124
136
  :closing_clamped :
@@ -134,7 +146,7 @@
134
146
  :default :
135
147
  - '…'
136
148
  :mdash :
137
- :re : '\s*(?<dash> - |--|–|—)\s*'
149
+ :re : '\p{Space}*(?<dash>\s-\s|--|–|—)\s*'
138
150
  :default :
139
151
  - '—'
140
152
  :ru :
@@ -153,6 +165,6 @@
153
165
  :default :
154
166
  - ' \1'
155
167
  :extra_spaces :
156
- :re : '\s+'
168
+ :re : ' +'
157
169
  :default :
158
170
  - ' '
@@ -33,14 +33,6 @@ module Typogrowth
33
33
  class Parser
34
34
  attr_reader :yaml, :shadows
35
35
 
36
- def self.safe_delimiters str
37
- delimiters = ['❮', '❯']
38
- loop do
39
- break delimiters unless str.match(/#{delimiters.join('|')}/)
40
- delimiters.map! {|d| d*2}
41
- end
42
- end
43
-
44
36
  #
45
37
  # Recursively merges the initial settings with custom.
46
38
  #
@@ -82,14 +74,15 @@ module Typogrowth
82
74
  # @param str [String] the string to be typographyed inplace
83
75
  # @param lang the language to use rules for
84
76
  #
85
- def parse str, lang: :default, shadows: []
77
+ def parse str, lang: :default, shadows: [], sections: nil
86
78
  lang = lang.to_sym
87
- delims = Parser.safe_delimiters str
79
+ delims = str.safe_delimiters
88
80
  str.split(/\R{2,}/).map { |para|
89
- @shadows.concat([*shadows]).uniq.each { |re|
81
+ [*shadows].concat(@shadows).uniq.each { |re|
90
82
  para.gsub!(re) { |m| "#{delims.first}#{Base64.encode64 m}#{delims.last}" }
91
83
  }
92
84
  @yaml.each { |key, values|
85
+ next if sections && ![*sections].include?(key)
93
86
  values.each { |k, v|
94
87
  if !!v[:re]
95
88
  v[lang] = v[:default] if (!v[lang] || v[lang].size.zero?)
@@ -128,12 +121,25 @@ module Typogrowth
128
121
  end
129
122
 
130
123
  def is_ru? str, shadows: []
131
- clean = @shadows.concat([*shadows]).uniq.inject(str) { |memo, re|
124
+ clean = [*shadows].concat(@shadows).uniq.inject(str) { |memo, re|
132
125
  memo.gsub(re, '')
133
126
  }
134
127
  clean.scan(/[А-Яа-я]/).size > clean.length / 3
135
128
  end
136
129
 
130
+ def defuse str, elements, shadows: []
131
+ delims = str.safe_delimiters
132
+ s = str.dup
133
+ [*shadows].concat(@shadows).uniq.each { |re|
134
+ s.gsub!(re) { |m| "#{delims.first}#{Base64.encode64 m}#{delims.last}" }
135
+ }
136
+ # s.gsub(Regexp.union(elements), ' \1 ')
137
+ s.gsub(/(#{elements.map {|e| Regexp.escape e}.join('|')})/, ' \1 ')
138
+ .gsub(/#{delims.first}(.*?)#{delims.last}/m) { |m|
139
+ Base64.decode64(m).force_encoding('UTF-8')
140
+ }
141
+ end
142
+
137
143
  def add_shadows re
138
144
  @shadows.concat [*re]
139
145
  end
@@ -143,13 +149,13 @@ module Typogrowth
143
149
  end
144
150
 
145
151
  # Out-of-place version of `String` typographing. See #parse!
146
- def self.parse str, lang: :default, shadows: []
147
- Parser.new.parse str, lang: lang, shadows: shadows
152
+ def self.parse str, lang: :default, shadows: [], sections: nil
153
+ Parser.new.parse str, lang: lang, shadows: shadows, sections: sections
148
154
  end
149
155
 
150
156
  # Out-of-place version of `String` typographing. See #parse!
151
- def self.parse! str, lang: :default, shadows: []
152
- str.replace self.parse str, lang: lang, shadows: shadows
157
+ def self.parse! str, lang: :default, shadows: [], sections: nil
158
+ str.replace self.parse str, lang: lang, shadows: shadows, sections: sections
153
159
  end
154
160
 
155
161
  # Out-of-place version of `String` typographing. See #parse!
@@ -157,30 +163,47 @@ module Typogrowth
157
163
  @@instance.is_ru? str, shadows: shadows
158
164
  end
159
165
 
166
+ # Out-of-place version of `String` typographing. See #parse!
167
+ def self.defuse str, elements, shadows: []
168
+ Parser.new.defuse str, elements, shadows: shadows
169
+ end
170
+
160
171
  DEFAULT_SET = 'typogrowth'
161
- HTML_TAG_RE = /<[^>]*>/
172
+ DEFAULT_SHADOWS = 'shadows'
173
+ HTML_TAG_RE = /<\s*[A-Za-z][^>]*>/
162
174
 
163
- def initialize file = nil
175
+ def initialize file = nil, shadows = nil
164
176
  file = DEFAULT_SET unless file
165
177
  @yaml = YAML.load_file "#{File.dirname(__FILE__)}/config/#{file}.yaml"
166
178
  @yaml.delete(:placeholder)
167
- @shadows = [HTML_TAG_RE, URI.regexp(['ftp', 'http', 'https', 'mailto'])]
179
+ shadows = DEFAULT_SHADOWS unless shadows
180
+ shadows = YAML.load_file("#{File.dirname(__FILE__)}/config/#{shadows}.yaml")
181
+ @shadows = ([
182
+ (shadows[:custom].map { |g| /#{g}/ } if shadows[:custom]),
183
+ (shadows[:grip].map { |g| /(?<=#{g})([^#{g}]*)(?=#{g})/m } if shadows[:grip]),
184
+ HTML_TAG_RE,
185
+ URI.regexp(['ftp', 'http', 'https', 'mailto'])
186
+ ] - [nil]).flatten
168
187
  end
169
188
 
170
189
  # Ready-to-use single instance
171
190
  @@instance = Parser.new
172
191
  end
173
192
 
174
- def self.parse str, lang: :default, shadows: []
175
- Parser.parse str, lang: lang, shadows: shadows
193
+ def self.parse str, lang: :default, shadows: [], sections: nil
194
+ Parser.parse str, lang: lang, shadows: shadows, sections: sections
176
195
  end
177
196
 
178
- def self.parse! str, lang: :default, shadows: []
179
- Parser.parse! str, lang: lang, shadows: shadows
197
+ def self.parse! str, lang: :default, shadows: [], sections: nil
198
+ Parser.parse! str, lang: lang, shadows: shadows, sections: sections
180
199
  end
181
200
 
182
201
  def self.is_ru? str, shadows: []
183
202
  Parser.is_ru? str, shadows: shadows
184
203
  end
204
+
205
+ def self.defuse str, elements, shadows: []
206
+ Parser.defuse str, elements, shadows: shadows
207
+ end
185
208
  end
186
209
 
@@ -1,21 +1,63 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  require 'i18n'
4
+ require 'base64'
4
5
  require_relative '../typogrowth'
5
6
 
6
7
  class String
8
+ PUNCTUATION = '¿?¡!()„“”‚‘’«».,:;'.split //
9
+
7
10
  # Typographyes the string and returns a result
8
11
  # See Typogrowth::Parser#parse
9
- def typo lang = nil
10
- Typogrowth.parse(self, lang: lang ? lang : is_ru? ? "ru" : I18n.locale)
12
+ def typo lang: nil, sections: nil, shadows: nil
13
+ Typogrowth.parse(
14
+ self,
15
+ lang: lang ? lang : is_ru? ? "ru" : I18n.locale,
16
+ shadows: shadows,
17
+ sections: sections
18
+ )
11
19
  end
12
20
  # Typographyes the string inplace
13
21
  # See Typogrowth::Parser#parse!
14
- def typo! lang = nil
15
- Typogrowth.parse!(self, lang: lang ? lang : is_ru? ? "ru" : I18n.locale)
22
+ def typo! lang: nil, sections: nil, shadows: nil
23
+ Typogrowth.parse!(
24
+ self,
25
+ lang: lang ? lang : is_ru? ? "ru" : I18n.locale,
26
+ shadows: shadows,
27
+ sections: sections
28
+ )
16
29
  end
17
30
 
18
- def is_ru? shadows = []
31
+ def is_ru? shadows: []
19
32
  Typogrowth.is_ru? self, shadows: shadows
20
33
  end
34
+
35
+ def defuse elements = nil, shadows: []
36
+ Typogrowth.defuse self, elements || PUNCTUATION, shadows: shadows
37
+ end
38
+
39
+ def psub pattern, exclusion, replacement
40
+ delims = self.safe_delimiters
41
+ s = self.dup
42
+ [*exclusion].each { |re|
43
+ re = /#{re}/ unless Regexp === re
44
+ s.gsub!(re) { |m| "#{delims.first}#{Base64.encode64 m}#{delims.last}" }
45
+ }
46
+ s.gsub! pattern, replacement
47
+ s.gsub!(/#{delims.first}(.*?)#{delims.last}/m) { |m|
48
+ Base64.decode64(m).force_encoding('UTF-8')
49
+ }
50
+ s
51
+ end
52
+
53
+ # private
54
+
55
+ def safe_delimiters
56
+ delimiters = ['❮', '❯']
57
+ loop do
58
+ break delimiters unless self.match(/#{delimiters.join('|')}/)
59
+ delimiters.map! {|d| d*2}
60
+ end
61
+ end
62
+
21
63
  end
@@ -1,3 +1,3 @@
1
1
  module Typogrowth
2
- VERSION = "0.9.7"
2
+ VERSION = "0.9.8"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: typogrowth
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.7
4
+ version: 0.9.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexei Matyushkin
@@ -128,6 +128,7 @@ files:
128
128
  - features/step_definitions/typogrowth_steps.rb
129
129
  - features/support/env.rb
130
130
  - features/typogrowth.feature
131
+ - lib/config/shadows.yaml
131
132
  - lib/config/typogrowth.yaml
132
133
  - lib/typogrowth.rb
133
134
  - lib/typogrowth/string.rb