textutils 1.3.1 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8d8339bc5ddee05b87cc13348f12756866bb708f
4
- data.tar.gz: d363527756ea6b4e345cb0563b2558aa81717d75
3
+ metadata.gz: 0db02f95a1da720a3778b0e1dc3636f7318e6fa6
4
+ data.tar.gz: c3420d50bc0c25e3d7e6ced6b9959c01770d471b
5
5
  SHA512:
6
- metadata.gz: e47a8c0a60b9d91de8e7d9c8e9b620cd599ee004fa7d1b3b9e597e02c667aa9095a6e4be7595d6a980b66eb512b4804e03ea5ea4f393d7f4294ea6f15b2f76c9
7
- data.tar.gz: 0e084073ee1e7cfad863122c82bcb4285c910017f75055dfaa52c93decdad24d5c98d7d3d2639b1450a289362ef1dcb03e9dce3b491c8224ba5ec2f5f077a072
6
+ metadata.gz: 690e28c6bb15160be57234c3092ef2db54eabfa5d7b66f6ed58dbcd6d5b876451654833c9c0334d55921571fb8498942ae02e8475d437dbd3944ba78afd340ab
7
+ data.tar.gz: 7348393e33bd3813c55c5b1360ec524836aa1cffb545e53597b599d89f009505c64560df2f705fac44c687644cfcc404aa2a653de8fdc8c6b054f0ae192c144f
@@ -1,61 +1,63 @@
1
- HISTORY.md
2
- Manifest.txt
3
- README.md
4
- Rakefile
5
- lib/textutils.rb
6
- lib/textutils/classifier.rb
7
- lib/textutils/core_ext/array.rb
8
- lib/textutils/core_ext/file.rb
9
- lib/textutils/core_ext/time.rb
10
- lib/textutils/filter/code_filter.rb
11
- lib/textutils/filter/comment_filter.rb
12
- lib/textutils/filter/erb_django_filter.rb
13
- lib/textutils/filter/erb_filter.rb
14
- lib/textutils/filter/string_filter.rb
15
- lib/textutils/helper/address_helper.rb
16
- lib/textutils/helper/date_helper.rb
17
- lib/textutils/helper/hypertext_helper.rb
18
- lib/textutils/helper/tag_helper.rb
19
- lib/textutils/helper/title_helper.rb
20
- lib/textutils/helper/unicode_helper.rb
21
- lib/textutils/helper/value_helper_i.rb
22
- lib/textutils/helper/value_helper_ii.rb
23
- lib/textutils/helper/value_helper_iii_numbers.rb
24
- lib/textutils/helper/xml_helper.rb
25
- lib/textutils/page.rb
26
- lib/textutils/parser/name_parser.rb
27
- lib/textutils/parser/name_tokenizer.rb
28
- lib/textutils/patterns.rb
29
- lib/textutils/reader/block_reader.rb
30
- lib/textutils/reader/code_reader.rb
31
- lib/textutils/reader/fixture_reader.rb
32
- lib/textutils/reader/hash_reader.rb
33
- lib/textutils/reader/line_reader.rb
34
- lib/textutils/reader/tree_reader.rb
35
- lib/textutils/reader/values_reader.rb
36
- lib/textutils/sanitizier.rb
37
- lib/textutils/title.rb
38
- lib/textutils/title_mapper.rb
39
- lib/textutils/utils.rb
40
- lib/textutils/version.rb
41
- test/data/at-austria/1--n-niederoesterreich/orte.txt
42
- test/data/cl_all.txt
43
- test/data/de-deutschland/3--by-bayern/4--oberfranken/orte.txt
44
- test/data/de-deutschland/3--by-bayern/4--oberfranken/orte_ii.txt
45
- test/data/de-deutschland/orte.txt
46
- test/data/feedburner.txt
47
- test/helper.rb
48
- test/test_address_helper.rb
49
- test/test_asciify.rb
50
- test/test_block_reader.rb
51
- test/test_fixture_reader.rb
52
- test/test_hypertext_helper.rb
53
- test/test_slugify.rb
54
- test/test_taglist.rb
55
- test/test_title_finder.rb
56
- test/test_title_helper.rb
57
- test/test_title_mapper.rb
58
- test/test_tree_reader.rb
59
- test/test_tree_reader_ii.rb
60
- test/test_unicode_helper.rb
61
- test/test_values_reader.rb
1
+ HISTORY.md
2
+ Manifest.txt
3
+ README.md
4
+ Rakefile
5
+ lib/textutils.rb
6
+ lib/textutils/classifier.rb
7
+ lib/textutils/core_ext/array.rb
8
+ lib/textutils/core_ext/file.rb
9
+ lib/textutils/core_ext/time.rb
10
+ lib/textutils/filter/code_filter.rb
11
+ lib/textutils/filter/comment_filter.rb
12
+ lib/textutils/filter/erb_django_filter.rb
13
+ lib/textutils/filter/erb_filter.rb
14
+ lib/textutils/filter/string_filter.rb
15
+ lib/textutils/helper/address_helper.rb
16
+ lib/textutils/helper/date_helper.rb
17
+ lib/textutils/helper/hypertext_helper.rb
18
+ lib/textutils/helper/tag_helper.rb
19
+ lib/textutils/helper/title_helper.rb
20
+ lib/textutils/helper/unicode_helper.rb
21
+ lib/textutils/helper/value_helper_i.rb
22
+ lib/textutils/helper/value_helper_ii.rb
23
+ lib/textutils/helper/value_helper_iii_numbers.rb
24
+ lib/textutils/helper/xml_helper.rb
25
+ lib/textutils/page.rb
26
+ lib/textutils/parser/name_parser.rb
27
+ lib/textutils/parser/name_tokenizer.rb
28
+ lib/textutils/patterns.rb
29
+ lib/textutils/reader/block_reader.rb
30
+ lib/textutils/reader/code_reader.rb
31
+ lib/textutils/reader/fixture_reader.rb
32
+ lib/textutils/reader/hash_reader.rb
33
+ lib/textutils/reader/line_reader.rb
34
+ lib/textutils/reader/tree_reader.rb
35
+ lib/textutils/reader/values_reader.rb
36
+ lib/textutils/sanitizier.rb
37
+ lib/textutils/title.rb
38
+ lib/textutils/title_mapper.rb
39
+ lib/textutils/title_mapper2.rb
40
+ lib/textutils/utils.rb
41
+ lib/textutils/version.rb
42
+ test/data/at-austria/1--n-niederoesterreich/orte.txt
43
+ test/data/cl_all.txt
44
+ test/data/de-deutschland/3--by-bayern/4--oberfranken/orte.txt
45
+ test/data/de-deutschland/3--by-bayern/4--oberfranken/orte_ii.txt
46
+ test/data/de-deutschland/orte.txt
47
+ test/data/feedburner.txt
48
+ test/helper.rb
49
+ test/test_address_helper.rb
50
+ test/test_asciify.rb
51
+ test/test_block_reader.rb
52
+ test/test_fixture_reader.rb
53
+ test/test_hypertext_helper.rb
54
+ test/test_slugify.rb
55
+ test/test_taglist.rb
56
+ test/test_title_finder.rb
57
+ test/test_title_helper.rb
58
+ test/test_title_mapper.rb
59
+ test/test_title_mapper2.rb
60
+ test/test_tree_reader.rb
61
+ test/test_tree_reader_ii.rb
62
+ test/test_unicode_helper.rb
63
+ test/test_values_reader.rb
data/Rakefile CHANGED
@@ -21,7 +21,7 @@ Hoe.spec 'textutils' do
21
21
  ['props', '>=1.1.2'],
22
22
  ['logutils', '>=0.6.1'],
23
23
  ### 3rd party gems
24
- ['rubyzip'], ## todo/check: make optional -why? why not??
24
+ ['rubyzip', '>=1.0.0'], ## note: 1.0 changed to require zip (pre 1.0 was zip/zip); todo/check: make optional -why? why not??
25
25
  ['activesupport'] ## todo/check: really needed? document what methods get used
26
26
  ]
27
27
 
@@ -66,6 +66,7 @@ require 'textutils/reader/tree_reader'
66
66
  require 'textutils/classifier'
67
67
  require 'textutils/title' # title table/mapper/finder utils
68
68
  require 'textutils/title_mapper'
69
+ require 'textutils/title_mapper2'
69
70
 
70
71
  require 'textutils/page' # for book pages and page templates
71
72
 
@@ -0,0 +1,168 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ ## see textutils/title.rb
5
+ ## for existing code
6
+ ## move over here
7
+
8
+
9
+ module TextUtils
10
+
11
+ class TitleMapper2 ## todo/check: rename to NameMapper ? why? why not??
12
+
13
+ include LogUtils::Logging
14
+
15
+ attr_reader :known_titles ## rename to mapping or mappings or just titles - why? why not?
16
+
17
+ ##
18
+ ## key: e.g. augsburg
19
+ ## title: e.g. FC Augsburg
20
+ ## length (of title - not pattern): e.g. 11 -- do not count dots (e.g. U.S.A. => 3 or 6) why? why not?
21
+ MappingStruct = Struct.new( :key, :title, :length, :pattern) ## todo/check: use (rename to) TitleStruct - why? why not??
22
+
23
+
24
+ def initialize( records, tag )
25
+ @known_titles = build_title_table_for( records ) ## build mapping lookup table
26
+
27
+ ## todo: rename tag to attrib or attrib_name - why ?? why not ???
28
+ @tag = tag # e.g. tag name use for @@brewery@@ @@team@@ etc.
29
+ end
30
+
31
+
32
+ def map_titles!( line ) ## rename to just map! - why?? why not???
33
+ begin
34
+ found = map_title_for!( @tag, line, @known_titles )
35
+ end while found
36
+ end
37
+
38
+ def find_key!( line )
39
+ find_key_for!( @tag, line )
40
+ end
41
+
42
+ def find_keys!( line ) # NB: keys (plural!) - will return array
43
+ counter = 1
44
+ keys = []
45
+
46
+ key = find_key_for!( "#{@tag}#{counter}", line )
47
+ while key.present?
48
+ keys << key
49
+ counter += 1
50
+ key = find_key_for!( "#{@tag}#{counter}", line )
51
+ end
52
+ keys
53
+ end
54
+
55
+
56
+ private
57
+ def build_title_table_for( records )
58
+
59
+ ## build known tracks table w/ synonyms e.g.
60
+ #
61
+ # [[ 'wolfsbrug', 'VfL Wolfsburg'],
62
+ # [ 'augsburg', 'FC Augsburg'],
63
+ # [ 'augsburg', 'Augi2'],
64
+ # [ 'augsburg', 'Augi3' ],
65
+ # [ 'stuttgart', 'VfB Stuttgart']]
66
+
67
+ known_titles = []
68
+
69
+ records.each_with_index do |rec,index|
70
+
71
+ title_candidates = []
72
+ title_candidates << rec.title
73
+
74
+ title_candidates += rec.synonyms.split('|') if rec.synonyms.present?
75
+
76
+
77
+ ## check if title includes subtitle e.g. Grand Prix Japan (Suzuka Circuit)
78
+ # make subtitle optional by adding title w/o subtitle e.g. Grand Prix Japan
79
+
80
+ titles = []
81
+ title_candidates.each do |t|
82
+ titles << t
83
+ if t =~ /\(.+\)/
84
+ extra_title = t.gsub( /\(.+\)/, '' ) # remove/delete subtitles
85
+ # note: strip leading n trailing withspaces too!
86
+ # -- todo: add squish or something if () is inline e.g. leaves two spaces?
87
+ extra_title.strip!
88
+ titles << extra_title
89
+ end
90
+ end
91
+
92
+ titles.each do |t|
93
+ m = MappingStruct.new
94
+ m.key = rec.key
95
+ m.title = t
96
+ m.length = t.length
97
+ ## note: escape for regex plus allow subs for special chars/accents
98
+ m.pattern = TextUtils.title_esc_regex( t )
99
+
100
+ known_titles << m
101
+ end
102
+
103
+ logger.debug " #{rec.class.name}[#{index+1}] #{rec.key} >#{titles.join('|')}<"
104
+
105
+ ## NB: only include code field - if defined
106
+ if rec.respond_to?(:code) && rec.code.present?
107
+ m = MappingStruct.new
108
+ m.key = rec.key
109
+ m.title = rec.code
110
+ m.length = rec.code.length
111
+ m.pattern = rec.code ## note: use code for now as is (no variants allowed fow now)
112
+
113
+ known_titles << m
114
+ end
115
+ end
116
+
117
+ ## note: sort here by length (largest goes first - best match)
118
+ # exclude code and key (key should always go last)
119
+ known_titles = known_titles.sort { |left,right| right.length <=> left.length }
120
+ known_titles
121
+ end
122
+
123
+
124
+ def map_title_for!( tag, line, mappings )
125
+
126
+ downcase_tag = tag.downcase
127
+
128
+ mappings.each do |mapping|
129
+
130
+ key = mapping.key
131
+ value = mapping.pattern
132
+ ## nb: \b does NOT include space or newline for word boundry (only alphanums e.g. a-z0-9)
133
+ ## (thus add it, allows match for Benfica Lis. for example - note . at the end)
134
+
135
+ ## check add $ e.g. (\b| |\t|$) does this work? - check w/ Benfica Lis.$
136
+ regex = /\b#{value}(\b| |\t|$)/ # wrap with world boundry (e.g. match only whole words e.g. not wac in wacker)
137
+ if line =~ regex
138
+ logger.debug " match for #{downcase_tag} >#{key}< >#{value}<"
139
+ # make sure @@oo{key}oo@@ doesn't match itself with other key e.g. wacker, wac, etc.
140
+ line.sub!( regex, "@@oo#{key}oo@@ " ) # NB: add one space char at end
141
+ return true # break out after first match (do NOT continue)
142
+ end
143
+ end
144
+ return false
145
+ end
146
+
147
+
148
+ def find_key_for!( tag, line )
149
+ regex = /@@oo([^@]+?)oo@@/ # e.g. everything in @@ .... @@ (use non-greedy +? plus all chars but not @, that is [^@])
150
+
151
+ upcase_tag = tag.upcase
152
+ downcase_tag = tag.downcase
153
+
154
+ if line =~ regex
155
+ value = "#{$1}"
156
+ logger.debug " #{downcase_tag}: >#{value}<"
157
+
158
+ line.sub!( regex, "[#{upcase_tag}]" )
159
+
160
+ return $1
161
+ else
162
+ return nil
163
+ end
164
+ end # method find_key_for!
165
+
166
+
167
+ end # class TitleMapper2
168
+ end # module TextUtils
@@ -3,8 +3,8 @@
3
3
  module TextUtils
4
4
 
5
5
  MAJOR = 1 ## todo: namespace inside version or something - why? why not??
6
- MINOR = 3
7
- PATCH = 1
6
+ MINOR = 4
7
+ PATCH = 0
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version
@@ -0,0 +1,45 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_title_mapper2.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+
11
+ class TestTitleMapper2 < Minitest::Test
12
+
13
+ ClubStruct = Struct.new(:key, :title, :synonyms)
14
+
15
+ def test_title_table
16
+
17
+ titles_in = [
18
+ ClubStruct.new( 'barcelona', 'Barcelona', 'FC Barcelona' ),
19
+ ClubStruct.new( 'espanyol', 'Espanyol', 'RCD Espanyol|Espanyol Barcelona' ),
20
+ ClubStruct.new( 'sevilla', 'Sevilla', 'Sevilla FC' )
21
+ ]
22
+
23
+ mapper = TextUtils::TitleMapper2.new( titles_in, 'club' )
24
+ titles_out = mapper.known_titles
25
+
26
+ puts 'titles_out:'
27
+ pp titles_out
28
+
29
+ line = "Espanyol Barcelona 1-0 FC Barcelona"
30
+ mapper.map_titles!( line )
31
+ puts "=> #{line}"
32
+
33
+ club1 = mapper.find_key!( line )
34
+ club2 = mapper.find_key!( line )
35
+ puts "=> #{line}"
36
+
37
+ assert_equal 'espanyol', club1
38
+ assert_equal 'barcelona', club2
39
+
40
+ assert true ## assume everything ok if we get here
41
+
42
+ end # method test_title_table
43
+
44
+
45
+ end # class TestTitleMapper2
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textutils
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.1
4
+ version: 1.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-06-15 00:00:00.000000000 Z
11
+ date: 2015-11-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: props
@@ -44,14 +44,14 @@ dependencies:
44
44
  requirements:
45
45
  - - ">="
46
46
  - !ruby/object:Gem::Version
47
- version: '0'
47
+ version: 1.0.0
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
- version: '0'
54
+ version: 1.0.0
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: activesupport
57
57
  requirement: !ruby/object:Gem::Requirement
@@ -86,14 +86,14 @@ dependencies:
86
86
  requirements:
87
87
  - - "~>"
88
88
  - !ruby/object:Gem::Version
89
- version: '3.13'
89
+ version: '3.14'
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
- version: '3.13'
96
+ version: '3.14'
97
97
  description: textutils - Text Filters, Helpers, Readers and More
98
98
  email: ruby-talk@ruby-lang.org
99
99
  executables: []
@@ -103,7 +103,6 @@ extra_rdoc_files:
103
103
  - Manifest.txt
104
104
  - README.md
105
105
  files:
106
- - ".gemtest"
107
106
  - HISTORY.md
108
107
  - Manifest.txt
109
108
  - README.md
@@ -142,6 +141,7 @@ files:
142
141
  - lib/textutils/sanitizier.rb
143
142
  - lib/textutils/title.rb
144
143
  - lib/textutils/title_mapper.rb
144
+ - lib/textutils/title_mapper2.rb
145
145
  - lib/textutils/utils.rb
146
146
  - lib/textutils/version.rb
147
147
  - test/data/at-austria/1--n-niederoesterreich/orte.txt
@@ -161,6 +161,7 @@ files:
161
161
  - test/test_title_finder.rb
162
162
  - test/test_title_helper.rb
163
163
  - test/test_title_mapper.rb
164
+ - test/test_title_mapper2.rb
164
165
  - test/test_tree_reader.rb
165
166
  - test/test_tree_reader_ii.rb
166
167
  - test/test_unicode_helper.rb
@@ -187,22 +188,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
187
188
  version: '0'
188
189
  requirements: []
189
190
  rubyforge_project:
190
- rubygems_version: 2.4.2
191
+ rubygems_version: 2.2.3
191
192
  signing_key:
192
193
  specification_version: 4
193
194
  summary: textutils - Text Filters, Helpers, Readers and More
194
- test_files:
195
- - test/test_title_finder.rb
196
- - test/test_fixture_reader.rb
197
- - test/test_unicode_helper.rb
198
- - test/test_asciify.rb
199
- - test/test_tree_reader.rb
200
- - test/test_title_mapper.rb
201
- - test/test_values_reader.rb
202
- - test/test_taglist.rb
203
- - test/test_hypertext_helper.rb
204
- - test/test_title_helper.rb
205
- - test/test_slugify.rb
206
- - test/test_address_helper.rb
207
- - test/test_block_reader.rb
208
- - test/test_tree_reader_ii.rb
195
+ test_files: []
data/.gemtest DELETED
File without changes