textutils 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8d8339bc5ddee05b87cc13348f12756866bb708f
4
- data.tar.gz: d363527756ea6b4e345cb0563b2558aa81717d75
3
+ metadata.gz: 0db02f95a1da720a3778b0e1dc3636f7318e6fa6
4
+ data.tar.gz: c3420d50bc0c25e3d7e6ced6b9959c01770d471b
5
5
  SHA512:
6
- metadata.gz: e47a8c0a60b9d91de8e7d9c8e9b620cd599ee004fa7d1b3b9e597e02c667aa9095a6e4be7595d6a980b66eb512b4804e03ea5ea4f393d7f4294ea6f15b2f76c9
7
- data.tar.gz: 0e084073ee1e7cfad863122c82bcb4285c910017f75055dfaa52c93decdad24d5c98d7d3d2639b1450a289362ef1dcb03e9dce3b491c8224ba5ec2f5f077a072
6
+ metadata.gz: 690e28c6bb15160be57234c3092ef2db54eabfa5d7b66f6ed58dbcd6d5b876451654833c9c0334d55921571fb8498942ae02e8475d437dbd3944ba78afd340ab
7
+ data.tar.gz: 7348393e33bd3813c55c5b1360ec524836aa1cffb545e53597b599d89f009505c64560df2f705fac44c687644cfcc404aa2a653de8fdc8c6b054f0ae192c144f
@@ -1,61 +1,63 @@
1
- HISTORY.md
2
- Manifest.txt
3
- README.md
4
- Rakefile
5
- lib/textutils.rb
6
- lib/textutils/classifier.rb
7
- lib/textutils/core_ext/array.rb
8
- lib/textutils/core_ext/file.rb
9
- lib/textutils/core_ext/time.rb
10
- lib/textutils/filter/code_filter.rb
11
- lib/textutils/filter/comment_filter.rb
12
- lib/textutils/filter/erb_django_filter.rb
13
- lib/textutils/filter/erb_filter.rb
14
- lib/textutils/filter/string_filter.rb
15
- lib/textutils/helper/address_helper.rb
16
- lib/textutils/helper/date_helper.rb
17
- lib/textutils/helper/hypertext_helper.rb
18
- lib/textutils/helper/tag_helper.rb
19
- lib/textutils/helper/title_helper.rb
20
- lib/textutils/helper/unicode_helper.rb
21
- lib/textutils/helper/value_helper_i.rb
22
- lib/textutils/helper/value_helper_ii.rb
23
- lib/textutils/helper/value_helper_iii_numbers.rb
24
- lib/textutils/helper/xml_helper.rb
25
- lib/textutils/page.rb
26
- lib/textutils/parser/name_parser.rb
27
- lib/textutils/parser/name_tokenizer.rb
28
- lib/textutils/patterns.rb
29
- lib/textutils/reader/block_reader.rb
30
- lib/textutils/reader/code_reader.rb
31
- lib/textutils/reader/fixture_reader.rb
32
- lib/textutils/reader/hash_reader.rb
33
- lib/textutils/reader/line_reader.rb
34
- lib/textutils/reader/tree_reader.rb
35
- lib/textutils/reader/values_reader.rb
36
- lib/textutils/sanitizier.rb
37
- lib/textutils/title.rb
38
- lib/textutils/title_mapper.rb
39
- lib/textutils/utils.rb
40
- lib/textutils/version.rb
41
- test/data/at-austria/1--n-niederoesterreich/orte.txt
42
- test/data/cl_all.txt
43
- test/data/de-deutschland/3--by-bayern/4--oberfranken/orte.txt
44
- test/data/de-deutschland/3--by-bayern/4--oberfranken/orte_ii.txt
45
- test/data/de-deutschland/orte.txt
46
- test/data/feedburner.txt
47
- test/helper.rb
48
- test/test_address_helper.rb
49
- test/test_asciify.rb
50
- test/test_block_reader.rb
51
- test/test_fixture_reader.rb
52
- test/test_hypertext_helper.rb
53
- test/test_slugify.rb
54
- test/test_taglist.rb
55
- test/test_title_finder.rb
56
- test/test_title_helper.rb
57
- test/test_title_mapper.rb
58
- test/test_tree_reader.rb
59
- test/test_tree_reader_ii.rb
60
- test/test_unicode_helper.rb
61
- test/test_values_reader.rb
1
+ HISTORY.md
2
+ Manifest.txt
3
+ README.md
4
+ Rakefile
5
+ lib/textutils.rb
6
+ lib/textutils/classifier.rb
7
+ lib/textutils/core_ext/array.rb
8
+ lib/textutils/core_ext/file.rb
9
+ lib/textutils/core_ext/time.rb
10
+ lib/textutils/filter/code_filter.rb
11
+ lib/textutils/filter/comment_filter.rb
12
+ lib/textutils/filter/erb_django_filter.rb
13
+ lib/textutils/filter/erb_filter.rb
14
+ lib/textutils/filter/string_filter.rb
15
+ lib/textutils/helper/address_helper.rb
16
+ lib/textutils/helper/date_helper.rb
17
+ lib/textutils/helper/hypertext_helper.rb
18
+ lib/textutils/helper/tag_helper.rb
19
+ lib/textutils/helper/title_helper.rb
20
+ lib/textutils/helper/unicode_helper.rb
21
+ lib/textutils/helper/value_helper_i.rb
22
+ lib/textutils/helper/value_helper_ii.rb
23
+ lib/textutils/helper/value_helper_iii_numbers.rb
24
+ lib/textutils/helper/xml_helper.rb
25
+ lib/textutils/page.rb
26
+ lib/textutils/parser/name_parser.rb
27
+ lib/textutils/parser/name_tokenizer.rb
28
+ lib/textutils/patterns.rb
29
+ lib/textutils/reader/block_reader.rb
30
+ lib/textutils/reader/code_reader.rb
31
+ lib/textutils/reader/fixture_reader.rb
32
+ lib/textutils/reader/hash_reader.rb
33
+ lib/textutils/reader/line_reader.rb
34
+ lib/textutils/reader/tree_reader.rb
35
+ lib/textutils/reader/values_reader.rb
36
+ lib/textutils/sanitizier.rb
37
+ lib/textutils/title.rb
38
+ lib/textutils/title_mapper.rb
39
+ lib/textutils/title_mapper2.rb
40
+ lib/textutils/utils.rb
41
+ lib/textutils/version.rb
42
+ test/data/at-austria/1--n-niederoesterreich/orte.txt
43
+ test/data/cl_all.txt
44
+ test/data/de-deutschland/3--by-bayern/4--oberfranken/orte.txt
45
+ test/data/de-deutschland/3--by-bayern/4--oberfranken/orte_ii.txt
46
+ test/data/de-deutschland/orte.txt
47
+ test/data/feedburner.txt
48
+ test/helper.rb
49
+ test/test_address_helper.rb
50
+ test/test_asciify.rb
51
+ test/test_block_reader.rb
52
+ test/test_fixture_reader.rb
53
+ test/test_hypertext_helper.rb
54
+ test/test_slugify.rb
55
+ test/test_taglist.rb
56
+ test/test_title_finder.rb
57
+ test/test_title_helper.rb
58
+ test/test_title_mapper.rb
59
+ test/test_title_mapper2.rb
60
+ test/test_tree_reader.rb
61
+ test/test_tree_reader_ii.rb
62
+ test/test_unicode_helper.rb
63
+ test/test_values_reader.rb
data/Rakefile CHANGED
@@ -21,7 +21,7 @@ Hoe.spec 'textutils' do
21
21
  ['props', '>=1.1.2'],
22
22
  ['logutils', '>=0.6.1'],
23
23
  ### 3rd party gems
24
- ['rubyzip'], ## todo/check: make optional -why? why not??
24
+ ['rubyzip', '>=1.0.0'], ## note: 1.0 changed to require zip (pre 1.0 was zip/zip); todo/check: make optional -why? why not??
25
25
  ['activesupport'] ## todo/check: really needed? document what methods get used
26
26
  ]
27
27
 
@@ -66,6 +66,7 @@ require 'textutils/reader/tree_reader'
66
66
  require 'textutils/classifier'
67
67
  require 'textutils/title' # title table/mapper/finder utils
68
68
  require 'textutils/title_mapper'
69
+ require 'textutils/title_mapper2'
69
70
 
70
71
  require 'textutils/page' # for book pages and page templates
71
72
 
@@ -0,0 +1,168 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ ## see textutils/title.rb
5
+ ## for existing code
6
+ ## move over here
7
+
8
+
9
+ module TextUtils
10
+
11
+ class TitleMapper2 ## todo/check: rename to NameMapper ? why? why not??
12
+
13
+ include LogUtils::Logging
14
+
15
+ attr_reader :known_titles ## rename to mapping or mappings or just titles - why? why not?
16
+
17
+ ##
18
+ ## key: e.g. augsburg
19
+ ## title: e.g. FC Augsburg
20
+ ## length (of title - not pattern): e.g. 11 -- do not count dots (e.g. U.S.A. => 3 or 6) why? why not?
21
+ MappingStruct = Struct.new( :key, :title, :length, :pattern) ## todo/check: use (rename to) TitleStruct - why? why not??
22
+
23
+
24
+ def initialize( records, tag )
25
+ @known_titles = build_title_table_for( records ) ## build mapping lookup table
26
+
27
+ ## todo: rename tag to attrib or attrib_name - why ?? why not ???
28
+ @tag = tag # e.g. tag name use for @@brewery@@ @@team@@ etc.
29
+ end
30
+
31
+
32
+ def map_titles!( line ) ## rename to just map! - why?? why not???
33
+ begin
34
+ found = map_title_for!( @tag, line, @known_titles )
35
+ end while found
36
+ end
37
+
38
+ def find_key!( line )
39
+ find_key_for!( @tag, line )
40
+ end
41
+
42
+ def find_keys!( line ) # NB: keys (plural!) - will return array
43
+ counter = 1
44
+ keys = []
45
+
46
+ key = find_key_for!( "#{@tag}#{counter}", line )
47
+ while key.present?
48
+ keys << key
49
+ counter += 1
50
+ key = find_key_for!( "#{@tag}#{counter}", line )
51
+ end
52
+ keys
53
+ end
54
+
55
+
56
+ private
57
+ def build_title_table_for( records )
58
+
59
+ ## build known tracks table w/ synonyms e.g.
60
+ #
61
+ # [[ 'wolfsbrug', 'VfL Wolfsburg'],
62
+ # [ 'augsburg', 'FC Augsburg'],
63
+ # [ 'augsburg', 'Augi2'],
64
+ # [ 'augsburg', 'Augi3' ],
65
+ # [ 'stuttgart', 'VfB Stuttgart']]
66
+
67
+ known_titles = []
68
+
69
+ records.each_with_index do |rec,index|
70
+
71
+ title_candidates = []
72
+ title_candidates << rec.title
73
+
74
+ title_candidates += rec.synonyms.split('|') if rec.synonyms.present?
75
+
76
+
77
+ ## check if title includes subtitle e.g. Grand Prix Japan (Suzuka Circuit)
78
+ # make subtitle optional by adding title w/o subtitle e.g. Grand Prix Japan
79
+
80
+ titles = []
81
+ title_candidates.each do |t|
82
+ titles << t
83
+ if t =~ /\(.+\)/
84
+ extra_title = t.gsub( /\(.+\)/, '' ) # remove/delete subtitles
85
+ # note: strip leading n trailing withspaces too!
86
+ # -- todo: add squish or something if () is inline e.g. leaves two spaces?
87
+ extra_title.strip!
88
+ titles << extra_title
89
+ end
90
+ end
91
+
92
+ titles.each do |t|
93
+ m = MappingStruct.new
94
+ m.key = rec.key
95
+ m.title = t
96
+ m.length = t.length
97
+ ## note: escape for regex plus allow subs for special chars/accents
98
+ m.pattern = TextUtils.title_esc_regex( t )
99
+
100
+ known_titles << m
101
+ end
102
+
103
+ logger.debug " #{rec.class.name}[#{index+1}] #{rec.key} >#{titles.join('|')}<"
104
+
105
+ ## NB: only include code field - if defined
106
+ if rec.respond_to?(:code) && rec.code.present?
107
+ m = MappingStruct.new
108
+ m.key = rec.key
109
+ m.title = rec.code
110
+ m.length = rec.code.length
111
+ m.pattern = rec.code ## note: use code for now as is (no variants allowed fow now)
112
+
113
+ known_titles << m
114
+ end
115
+ end
116
+
117
+ ## note: sort here by length (largest goes first - best match)
118
+ # exclude code and key (key should always go last)
119
+ known_titles = known_titles.sort { |left,right| right.length <=> left.length }
120
+ known_titles
121
+ end
122
+
123
+
124
+ def map_title_for!( tag, line, mappings )
125
+
126
+ downcase_tag = tag.downcase
127
+
128
+ mappings.each do |mapping|
129
+
130
+ key = mapping.key
131
+ value = mapping.pattern
132
+ ## nb: \b does NOT include space or newline for word boundry (only alphanums e.g. a-z0-9)
133
+ ## (thus add it, allows match for Benfica Lis. for example - note . at the end)
134
+
135
+ ## check add $ e.g. (\b| |\t|$) does this work? - check w/ Benfica Lis.$
136
+ regex = /\b#{value}(\b| |\t|$)/ # wrap with world boundry (e.g. match only whole words e.g. not wac in wacker)
137
+ if line =~ regex
138
+ logger.debug " match for #{downcase_tag} >#{key}< >#{value}<"
139
+ # make sure @@oo{key}oo@@ doesn't match itself with other key e.g. wacker, wac, etc.
140
+ line.sub!( regex, "@@oo#{key}oo@@ " ) # NB: add one space char at end
141
+ return true # break out after first match (do NOT continue)
142
+ end
143
+ end
144
+ return false
145
+ end
146
+
147
+
148
+ def find_key_for!( tag, line )
149
+ regex = /@@oo([^@]+?)oo@@/ # e.g. everything in @@ .... @@ (use non-greedy +? plus all chars but not @, that is [^@])
150
+
151
+ upcase_tag = tag.upcase
152
+ downcase_tag = tag.downcase
153
+
154
+ if line =~ regex
155
+ value = "#{$1}"
156
+ logger.debug " #{downcase_tag}: >#{value}<"
157
+
158
+ line.sub!( regex, "[#{upcase_tag}]" )
159
+
160
+ return $1
161
+ else
162
+ return nil
163
+ end
164
+ end # method find_key_for!
165
+
166
+
167
+ end # class TitleMapper2
168
+ end # module TextUtils
@@ -3,8 +3,8 @@
3
3
  module TextUtils
4
4
 
5
5
  MAJOR = 1 ## todo: namespace inside version or something - why? why not??
6
- MINOR = 3
7
- PATCH = 1
6
+ MINOR = 4
7
+ PATCH = 0
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version
@@ -0,0 +1,45 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_title_mapper2.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+
11
+ class TestTitleMapper2 < Minitest::Test
12
+
13
+ ClubStruct = Struct.new(:key, :title, :synonyms)
14
+
15
+ def test_title_table
16
+
17
+ titles_in = [
18
+ ClubStruct.new( 'barcelona', 'Barcelona', 'FC Barcelona' ),
19
+ ClubStruct.new( 'espanyol', 'Espanyol', 'RCD Espanyol|Espanyol Barcelona' ),
20
+ ClubStruct.new( 'sevilla', 'Sevilla', 'Sevilla FC' )
21
+ ]
22
+
23
+ mapper = TextUtils::TitleMapper2.new( titles_in, 'club' )
24
+ titles_out = mapper.known_titles
25
+
26
+ puts 'titles_out:'
27
+ pp titles_out
28
+
29
+ line = "Espanyol Barcelona 1-0 FC Barcelona"
30
+ mapper.map_titles!( line )
31
+ puts "=> #{line}"
32
+
33
+ club1 = mapper.find_key!( line )
34
+ club2 = mapper.find_key!( line )
35
+ puts "=> #{line}"
36
+
37
+ assert_equal 'espanyol', club1
38
+ assert_equal 'barcelona', club2
39
+
40
+ assert true ## assume everything ok if we get here
41
+
42
+ end # method test_title_table
43
+
44
+
45
+ end # class TestTitleMapper2
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textutils
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.1
4
+ version: 1.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-06-15 00:00:00.000000000 Z
11
+ date: 2015-11-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: props
@@ -44,14 +44,14 @@ dependencies:
44
44
  requirements:
45
45
  - - ">="
46
46
  - !ruby/object:Gem::Version
47
- version: '0'
47
+ version: 1.0.0
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
- version: '0'
54
+ version: 1.0.0
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: activesupport
57
57
  requirement: !ruby/object:Gem::Requirement
@@ -86,14 +86,14 @@ dependencies:
86
86
  requirements:
87
87
  - - "~>"
88
88
  - !ruby/object:Gem::Version
89
- version: '3.13'
89
+ version: '3.14'
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
- version: '3.13'
96
+ version: '3.14'
97
97
  description: textutils - Text Filters, Helpers, Readers and More
98
98
  email: ruby-talk@ruby-lang.org
99
99
  executables: []
@@ -103,7 +103,6 @@ extra_rdoc_files:
103
103
  - Manifest.txt
104
104
  - README.md
105
105
  files:
106
- - ".gemtest"
107
106
  - HISTORY.md
108
107
  - Manifest.txt
109
108
  - README.md
@@ -142,6 +141,7 @@ files:
142
141
  - lib/textutils/sanitizier.rb
143
142
  - lib/textutils/title.rb
144
143
  - lib/textutils/title_mapper.rb
144
+ - lib/textutils/title_mapper2.rb
145
145
  - lib/textutils/utils.rb
146
146
  - lib/textutils/version.rb
147
147
  - test/data/at-austria/1--n-niederoesterreich/orte.txt
@@ -161,6 +161,7 @@ files:
161
161
  - test/test_title_finder.rb
162
162
  - test/test_title_helper.rb
163
163
  - test/test_title_mapper.rb
164
+ - test/test_title_mapper2.rb
164
165
  - test/test_tree_reader.rb
165
166
  - test/test_tree_reader_ii.rb
166
167
  - test/test_unicode_helper.rb
@@ -187,22 +188,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
187
188
  version: '0'
188
189
  requirements: []
189
190
  rubyforge_project:
190
- rubygems_version: 2.4.2
191
+ rubygems_version: 2.2.3
191
192
  signing_key:
192
193
  specification_version: 4
193
194
  summary: textutils - Text Filters, Helpers, Readers and More
194
- test_files:
195
- - test/test_title_finder.rb
196
- - test/test_fixture_reader.rb
197
- - test/test_unicode_helper.rb
198
- - test/test_asciify.rb
199
- - test/test_tree_reader.rb
200
- - test/test_title_mapper.rb
201
- - test/test_values_reader.rb
202
- - test/test_taglist.rb
203
- - test/test_hypertext_helper.rb
204
- - test/test_title_helper.rb
205
- - test/test_slugify.rb
206
- - test/test_address_helper.rb
207
- - test/test_block_reader.rb
208
- - test/test_tree_reader_ii.rb
195
+ test_files: []
data/.gemtest DELETED
File without changes