textutils 1.2.4 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Manifest.txt +6 -0
- data/lib/textutils.rb +3 -2
- data/lib/textutils/parser/name_parser.rb +38 -33
- data/lib/textutils/parser/name_tokenizer.rb +51 -0
- data/lib/textutils/reader/tree_reader.rb +96 -0
- data/lib/textutils/version.rb +2 -2
- data/test/data/de-deutschland/3--by-bayern/4--oberfranken/orte.txt +103 -0
- data/test/data/de-deutschland/3--by-bayern/4--oberfranken/orte_ii.txt +17 -0
- data/test/data/de-deutschland/orte.txt +12 -0
- data/test/test_tree_reader.rb +33 -0
- metadata +8 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cb4855c8da2e08a23a8c84f436058695ca79b5d2
|
4
|
+
data.tar.gz: 7373f05346e5939481ab569bc411d37b8d155a09
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 547e53097dcc8ca0bade46448b7fd26998cc4ff333b33142bddb0db6b8654e004d78498c01bdf27fb9c2b10578e8e5808f89e7aa632ec73ffa1f114dbf693979
|
7
|
+
data.tar.gz: d9d26f569c2ebd76766c6ae8d592e08310f5632c40a78dc80f04d2085b031551a7f61f21cca046bce8fcbd922584a273540346f9d26863b41d6093f956bf828e
|
data/Manifest.txt
CHANGED
@@ -24,12 +24,14 @@ lib/textutils/helper/value_helper_iii_numbers.rb
|
|
24
24
|
lib/textutils/helper/xml_helper.rb
|
25
25
|
lib/textutils/page.rb
|
26
26
|
lib/textutils/parser/name_parser.rb
|
27
|
+
lib/textutils/parser/name_tokenizer.rb
|
27
28
|
lib/textutils/patterns.rb
|
28
29
|
lib/textutils/reader/block_reader.rb
|
29
30
|
lib/textutils/reader/code_reader.rb
|
30
31
|
lib/textutils/reader/fixture_reader.rb
|
31
32
|
lib/textutils/reader/hash_reader.rb
|
32
33
|
lib/textutils/reader/line_reader.rb
|
34
|
+
lib/textutils/reader/tree_reader.rb
|
33
35
|
lib/textutils/reader/values_reader.rb
|
34
36
|
lib/textutils/sanitizier.rb
|
35
37
|
lib/textutils/title.rb
|
@@ -37,6 +39,9 @@ lib/textutils/title_mapper.rb
|
|
37
39
|
lib/textutils/utils.rb
|
38
40
|
lib/textutils/version.rb
|
39
41
|
test/data/cl_all.txt
|
42
|
+
test/data/de-deutschland/3--by-bayern/4--oberfranken/orte.txt
|
43
|
+
test/data/de-deutschland/3--by-bayern/4--oberfranken/orte_ii.txt
|
44
|
+
test/data/de-deutschland/orte.txt
|
40
45
|
test/data/feedburner.txt
|
41
46
|
test/helper.rb
|
42
47
|
test/test_address_helper.rb
|
@@ -49,5 +54,6 @@ test/test_taglist.rb
|
|
49
54
|
test/test_title_finder.rb
|
50
55
|
test/test_title_helper.rb
|
51
56
|
test/test_title_mapper.rb
|
57
|
+
test/test_tree_reader.rb
|
52
58
|
test/test_unicode_helper.rb
|
53
59
|
test/test_values_reader.rb
|
data/lib/textutils.rb
CHANGED
@@ -52,8 +52,8 @@ require 'textutils/core_ext/file'
|
|
52
52
|
require 'textutils/core_ext/time'
|
53
53
|
require 'textutils/core_ext/array'
|
54
54
|
|
55
|
-
|
56
55
|
require 'textutils/parser/name_parser'
|
56
|
+
require 'textutils/parser/name_tokenizer'
|
57
57
|
|
58
58
|
require 'textutils/reader/code_reader'
|
59
59
|
require 'textutils/reader/hash_reader'
|
@@ -61,6 +61,7 @@ require 'textutils/reader/line_reader'
|
|
61
61
|
require 'textutils/reader/values_reader'
|
62
62
|
require 'textutils/reader/fixture_reader'
|
63
63
|
require 'textutils/reader/block_reader'
|
64
|
+
require 'textutils/reader/tree_reader'
|
64
65
|
|
65
66
|
require 'textutils/classifier'
|
66
67
|
require 'textutils/title' # title table/mapper/finder utils
|
@@ -71,4 +72,4 @@ require 'textutils/page' # for book pages and page templates
|
|
71
72
|
|
72
73
|
|
73
74
|
# say hello
|
74
|
-
puts TextUtils.banner if
|
75
|
+
puts TextUtils.banner if defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG
|
@@ -2,60 +2,65 @@
|
|
2
2
|
|
3
3
|
# fix: move into TextUtils namespace/module!! ??
|
4
4
|
|
5
|
+
class NameParser
|
5
6
|
|
6
|
-
class NameTokenizer ## - rename to NameScanner, NameSplitter, NameSeparator, etc.
|
7
|
-
|
8
|
-
## split (single) string value into array of names
|
9
|
-
## e.g.
|
10
|
-
## 'München [Munich]' => ['München', '[Munich]']
|
11
|
-
## 'Wr. Neustadt | Wiener Neustadt' => ['Wr. Neustadt', 'Wiener Neustadt']
|
12
7
|
include LogUtils::Logging
|
13
8
|
|
14
|
-
def
|
15
|
-
|
9
|
+
def parse( chunks )
|
10
|
+
## todo/fix: (re)use nameparser - for now "simple" inline version
|
11
|
+
## fix!!! - note: for now lang gets ignored
|
12
|
+
## fix: add hanlde
|
13
|
+
## Leuven[nl]|Louvain[fr] Löwen[de]
|
14
|
+
## Antwerpen[nl]|Anvers[fr] [Antwerp]
|
15
|
+
## Brussel[nl]•Bruxelles[fr] -> official bi-lingual name
|
16
|
+
## etc.
|
16
17
|
|
17
|
-
|
18
|
-
|
18
|
+
## values - split into names (name n lang pairs)
|
19
|
+
## note: assumes (default) lang from more_attribs unless otherwise marked e.g. [] assume en etc.
|
19
20
|
|
20
|
-
|
21
|
+
## split chunks into values
|
22
|
+
values = []
|
23
|
+
chunks.each do |chunk|
|
24
|
+
next if chunk.nil? || chunk.blank? ## skip nil or empty/blank chunks
|
21
25
|
|
22
|
-
|
23
|
-
## todo: add support for bullet (official bi-lingual names w/ tags ??) - see brussels - why, why not??
|
26
|
+
parts = chunk.split( '|' ) # 1) split |
|
24
27
|
|
25
|
-
|
28
|
+
parts.each do |part|
|
26
29
|
s = StringScanner.new( part )
|
27
30
|
s.skip( /[ \t]+/) # skip whitespaces
|
28
31
|
|
29
32
|
while s.eos? == false
|
30
33
|
if s.check( /\[/ )
|
31
34
|
## scan everything until the end of bracket (e.g.])
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
+
## fix!!! - note: for now lang gets ignored
|
36
|
+
value = s.scan( /\[[^\]]+\]/)
|
37
|
+
value = value[1...-1] # strip enclosing [] e.g. [Bavaria] => Bavaria
|
35
38
|
else
|
36
39
|
## scan everything until the begin of bracket (e.g.[)
|
37
|
-
|
38
|
-
|
40
|
+
value = s.scan( /[^\[]+/)
|
41
|
+
value = value.strip
|
39
42
|
end
|
40
|
-
|
43
|
+
values << value
|
41
44
|
|
42
45
|
s.skip( /[ \t]+/) # skip whitespaces
|
43
|
-
logger.debug( "[
|
46
|
+
logger.debug( "[NameParser] eos?: #{s.eos?}, rest: >#{s.rest}<" )
|
44
47
|
end
|
45
|
-
|
46
|
-
|
47
|
-
logger.debug( "[NameTokenizer] names=#{names.inspect}")
|
48
|
-
names
|
49
|
-
end # method split
|
50
|
-
end # class NameTokenizer
|
48
|
+
end
|
49
|
+
end
|
51
50
|
|
51
|
+
logger.debug( "[NameParser] values=#{values.inspect}")
|
52
52
|
|
53
|
-
=
|
54
|
-
|
53
|
+
names = []
|
54
|
+
values.each do |value|
|
55
|
+
name = value
|
56
|
+
## todo: split by bullet ? (official multilang name) e.g. Brussel • Bruxelles
|
57
|
+
## todo: process variants w/ () e.g. Krems (a. d. Donau) etc. ??
|
58
|
+
names << name
|
59
|
+
end
|
55
60
|
|
56
|
-
|
61
|
+
logger.debug( "[NameParser] names=#{names.inspect}")
|
57
62
|
|
58
|
-
|
59
|
-
|
63
|
+
names
|
64
|
+
end # method parse
|
60
65
|
end # class NameParser
|
61
|
-
|
66
|
+
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# fix: move into TextUtils namespace/module!! ??
|
4
|
+
|
5
|
+
|
6
|
+
class NameTokenizer ## - rename to NameScanner, NameSplitter, NameSeparator, etc.
|
7
|
+
|
8
|
+
## split (single) string value into array of names
|
9
|
+
## e.g.
|
10
|
+
## 'München [Munich]' => ['München', '[Munich]']
|
11
|
+
## 'Wr. Neustadt | Wiener Neustadt' => ['Wr. Neustadt', 'Wiener Neustadt']
|
12
|
+
include LogUtils::Logging
|
13
|
+
|
14
|
+
def tokenize( value ) ## rename to/use split - why? why not??
|
15
|
+
names = []
|
16
|
+
|
17
|
+
# 1) split by | (pipe) -- remove leading n trailing whitespaces
|
18
|
+
parts = value.split( /[ \t]*\|[ \t]*/ )
|
19
|
+
|
20
|
+
# 2) split "inline" translations e.g. München [Munich]
|
21
|
+
|
22
|
+
## todo: add support for Munich [en] e.g. trailing lang tag
|
23
|
+
## todo: add support for bullet (official bi-lingual names w/ tags ??) - see brussels - why, why not??
|
24
|
+
|
25
|
+
parts.each do |part|
|
26
|
+
s = StringScanner.new( part )
|
27
|
+
s.skip( /[ \t]+/) # skip whitespaces
|
28
|
+
|
29
|
+
while s.eos? == false
|
30
|
+
if s.check( /\[/ )
|
31
|
+
## scan everything until the end of bracket (e.g.])
|
32
|
+
name = s.scan( /\[[^\]]+\]/)
|
33
|
+
## todo/fix: if name nil - issue warning??
|
34
|
+
# starting w/ [ but no closing ] found !!!! - possible? fix!!
|
35
|
+
else
|
36
|
+
## scan everything until the begin of bracket (e.g.[)
|
37
|
+
name = s.scan( /[^\[]+/)
|
38
|
+
name = name.rstrip ## remove trailing spaces (if present)
|
39
|
+
end
|
40
|
+
names << name
|
41
|
+
|
42
|
+
s.skip( /[ \t]+/) # skip whitespaces
|
43
|
+
logger.debug( "[NameTokenizer] eos?: #{s.eos?}, rest: >#{s.rest}<" )
|
44
|
+
end
|
45
|
+
end # each part
|
46
|
+
|
47
|
+
logger.debug( "[NameTokenizer] names=#{names.inspect}")
|
48
|
+
names
|
49
|
+
end # method split
|
50
|
+
end # class NameTokenizer
|
51
|
+
|
@@ -0,0 +1,96 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# fix: move into TextUtils namespace/module!!
|
4
|
+
|
5
|
+
class TreeReader
|
6
|
+
|
7
|
+
include LogUtils::Logging
|
8
|
+
|
9
|
+
def self.from_file( path )
|
10
|
+
## nb: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
|
11
|
+
## - see textutils/utils.rb
|
12
|
+
text = File.read_utf8( path )
|
13
|
+
self.from_string( text )
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.from_string( text )
|
17
|
+
self.new( text )
|
18
|
+
end
|
19
|
+
|
20
|
+
def initialize( text )
|
21
|
+
@text = text
|
22
|
+
end
|
23
|
+
|
24
|
+
TreeItem = Struct.new( :level, :key, :value )
|
25
|
+
|
26
|
+
KEY_REGEX = /
|
27
|
+
([0-9][0-9A-Za-z]*) ## key starting with a nummer
|
28
|
+
|
|
29
|
+
([a-z]+) ## key all lowercase e.g. bt,n,etc.
|
30
|
+
|
|
31
|
+
([A-Z]+) ## key all uppercase e.g. BT,N,etc
|
32
|
+
/x
|
33
|
+
|
34
|
+
LEVEL_REGEX = /\.+/ ## e.g. .. or .... etc.
|
35
|
+
|
36
|
+
|
37
|
+
def each_line
|
38
|
+
stack = [] # note: last_level => stack.size; starts w/ 0
|
39
|
+
times = 2 # assume two indents factor (e.g. .. =2, ....=3 etc. ) for now
|
40
|
+
|
41
|
+
reader = LineReader.from_string( @text )
|
42
|
+
reader.each_line do |line|
|
43
|
+
|
44
|
+
logger.debug "[TreeReader] line (before) => >#{line}<"
|
45
|
+
|
46
|
+
s = StringScanner.new( line )
|
47
|
+
s.skip( /[ \t]+/ ) # remove whitespace
|
48
|
+
|
49
|
+
key = s.scan( KEY_REGEX )
|
50
|
+
if key
|
51
|
+
s.skip( /[ \t]+/ ) # remove whitespace
|
52
|
+
end
|
53
|
+
|
54
|
+
level_str = s.scan( LEVEL_REGEX )
|
55
|
+
if level_str
|
56
|
+
## FIX!! todo/check: make sure level_str.size is a multiple of two !! (e.g. 2,4,6,etc.)
|
57
|
+
level = (level_str.size/times)+1
|
58
|
+
s.skip( /[ \t]+/ ) # remove whitespace
|
59
|
+
else
|
60
|
+
level = 1 ## no level found; assume top level (start w/ 1)
|
61
|
+
end
|
62
|
+
|
63
|
+
## assume rest is record
|
64
|
+
rest = s.rest ## was: s.scan( /.+/ )
|
65
|
+
|
66
|
+
level_diff = level - stack.size
|
67
|
+
|
68
|
+
if level_diff > 0
|
69
|
+
logger.debug "[TreeReader] up +#{level_diff}"
|
70
|
+
## FIX!!! todo/check/verify/assert: always must be +1
|
71
|
+
elsif level_diff < 0
|
72
|
+
logger.debug "[TreeReader] down #{level_diff}"
|
73
|
+
level_diff.abs.times { stack.pop }
|
74
|
+
stack.pop
|
75
|
+
else
|
76
|
+
## same level
|
77
|
+
stack.pop
|
78
|
+
end
|
79
|
+
|
80
|
+
item = TreeItem.new
|
81
|
+
item.level = level
|
82
|
+
item.key = key
|
83
|
+
item.value = rest
|
84
|
+
|
85
|
+
stack.push( item )
|
86
|
+
|
87
|
+
## for debugging - show tree item (note) hierarchy
|
88
|
+
names = stack.map { |it| "(#{it.level}) #{it.value}" }
|
89
|
+
logger.debug "[TreeReader] #{names.join( ' › ' )} -- key: >#{key}<, level: >#{level}<, rest: >#{rest}<"
|
90
|
+
|
91
|
+
yield( stack )
|
92
|
+
end
|
93
|
+
end # method each_line
|
94
|
+
|
95
|
+
end # class TreeReader
|
96
|
+
|
data/lib/textutils/version.rb
CHANGED
@@ -0,0 +1,103 @@
|
|
1
|
+
2 Bayern
|
2
|
+
24 .. Oberfranken
|
3
|
+
241 .... Bamberg (Stadt) ## Kreisfreie Stadt
|
4
|
+
...... Bamberg
|
5
|
+
........ Bamberg
|
6
|
+
242 .... Bayreuth (Stadt) ## Kreisfreie Stadt
|
7
|
+
...... Bayreuth
|
8
|
+
........ Bayreuth
|
9
|
+
243 .... Coburg (Stadt) ## Kreisfreie Stadt
|
10
|
+
...... Coburg
|
11
|
+
........ Coburg
|
12
|
+
244 .... Hof (Stadt) ## Kreisfreie Stadt
|
13
|
+
...... Hof
|
14
|
+
........ Hof
|
15
|
+
|
16
|
+
245 .... Bamberg (Land) ## Landkreis -- 36 Gemeinden; see de.wikipedia.org/wiki/Landkreis_Bamberg
|
17
|
+
## 4 Städte
|
18
|
+
...... Baunach ## (4013, 30,9 km²)
|
19
|
+
........ Baunach
|
20
|
+
...... Hallstadt ## (8364, 14,5 km²)
|
21
|
+
........ Hallstadt ## (7588)
|
22
|
+
........ Dörfleins ## (1380)
|
23
|
+
...... Scheßlitz ## (7184, 94,9 km²)
|
24
|
+
........ Scheßlitz
|
25
|
+
........ Köttensdorf
|
26
|
+
........ Würgau
|
27
|
+
...... Schlüsselfeld ## (5712, 70,2 km²)
|
28
|
+
|
29
|
+
## 8 Märkte
|
30
|
+
...... Burgebrach ## (6553, 87,9 km²)
|
31
|
+
...... Burgwindheim ## (1311, 37,4 km²)
|
32
|
+
...... Buttenheim ## (3472, 30 km²)
|
33
|
+
...... Ebrach ## (1830, 29,6 km²)
|
34
|
+
...... Heiligenstadt i. OFr. ## (3525, 76,7 km²)
|
35
|
+
........ Heiligenstadt i. OFr.
|
36
|
+
........ Oberleinleiter
|
37
|
+
...... Hirschaid ## (11.919, 41 km²)
|
38
|
+
...... Rattelsdorf ## (4568, 39,6 km²)
|
39
|
+
........ Rattelsdorf
|
40
|
+
........ Mürsbach
|
41
|
+
........ Freudeneck
|
42
|
+
........ Höfen
|
43
|
+
........ Ebing
|
44
|
+
...... Zapfendorf ## (4954, 30,6 km²)
|
45
|
+
|
46
|
+
## 24 Gemeinden
|
47
|
+
...... Altendorf ## (2012, 8,6 km²)
|
48
|
+
...... Bischberg ## (6012, 17,5 km²)
|
49
|
+
...... Breitengüßbach ## (4586, 16,9 km²)
|
50
|
+
........ Breitengüßbach
|
51
|
+
...... Frensdorf ## (4865, 44 km²)
|
52
|
+
...... Gerach ## (946, 7,8 km²)
|
53
|
+
...... Gundelsheim ## (3378, 3,8 km²)
|
54
|
+
...... Kemmern ## (2544, 8,3 km²)
|
55
|
+
........ Kemmern
|
56
|
+
...... Königsfeld ## (1335, 42,7 km²)
|
57
|
+
........ Königsfeld
|
58
|
+
........ Huppendorf
|
59
|
+
...... Lauter ## (1139, 12,8 km²)
|
60
|
+
........ Lauter ## (601)
|
61
|
+
........ Appendorf ## (213)
|
62
|
+
...... Lisberg ## (1813, 8,4 km²)
|
63
|
+
...... Litzendorf ## (6057, 25,9 km²)
|
64
|
+
........ Litzendorf
|
65
|
+
........ Schammelsdorf
|
66
|
+
........ Melkendorf
|
67
|
+
........ Lohndorf
|
68
|
+
........ Tiefenellern
|
69
|
+
...... Memmelsdorf ## (8854, 26,2 km²)
|
70
|
+
........ Memmelsdorf
|
71
|
+
........ Merkendorf
|
72
|
+
........ Drosendorf
|
73
|
+
...... Oberhaid ## (4590, 27,2 km²)
|
74
|
+
........ Oberhaid
|
75
|
+
........ Staffelbach
|
76
|
+
...... Pettstadt ## (1940, 9,9 km²)
|
77
|
+
...... Pommersfelden ## (2851, 35,7 km²)
|
78
|
+
...... Priesendorf ## (1470, 8,4 km²)
|
79
|
+
...... Reckendorf ## (2033, 13,1 km²)
|
80
|
+
........ Reckendorf
|
81
|
+
...... Schönbrunn im Steigerwald ## (1880, 24,7 km²)
|
82
|
+
...... Stadelhofen ## (1250, 41 km²)
|
83
|
+
........ Stadelhofen
|
84
|
+
........ Steinfeld
|
85
|
+
........ Schederndorf
|
86
|
+
...... Stegaurach ## (6842, 23,9 km²)
|
87
|
+
...... Strullendorf ## (7807, 31,7 km²)
|
88
|
+
........ Strullendorf
|
89
|
+
........ Geisfeld
|
90
|
+
........ Roßdorf am Forst
|
91
|
+
...... Viereth-Trunstadt ## (3562, 15,8 km²)
|
92
|
+
...... Walsdorf ## (2575, 16,2 km²)
|
93
|
+
...... Wattendorf ## (679, 22,2 km²)
|
94
|
+
........ Wattendorf
|
95
|
+
|
96
|
+
246 .... Bayreuth (Land) ## Landkreis
|
97
|
+
247 .... Coburg (Land) ## Landkreis
|
98
|
+
248 .... Forchheim
|
99
|
+
249 .... Hof (Land) ## Landkreis
|
100
|
+
24A .... Kronach
|
101
|
+
24B .... Kulmbach
|
102
|
+
24C .... Lichtenfels
|
103
|
+
24D .... Wunsiedel i. Fichtelgebirge
|
@@ -0,0 +1,17 @@
|
|
1
|
+
2 Bayern
|
2
|
+
24 .. Oberfranken
|
3
|
+
241 .... Bamberg (Stadt) ## Kreisfreie Stadt
|
4
|
+
...... Bamberg
|
5
|
+
........ Bamberg
|
6
|
+
242 .... Bayreuth (Stadt) ## Kreisfreie Stadt
|
7
|
+
...... Bayreuth
|
8
|
+
........ Bayreuth
|
9
|
+
|
10
|
+
245 .... Bamberg (Land) ## Landkreis -- 36 Gemeinden; see de.wikipedia.org/wiki/Landkreis_Bamberg
|
11
|
+
## 4 Städte
|
12
|
+
...... Baunach ## (4013, 30,9 km²)
|
13
|
+
........ Baunach
|
14
|
+
...... Hallstadt ## (8364, 14,5 km²)
|
15
|
+
........ Hallstadt ## (7588)
|
16
|
+
........ Dörfleins ## (1380)
|
17
|
+
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_tree_reader.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestTreeReader < MiniTest::Test
|
11
|
+
|
12
|
+
def test_oberfranken
|
13
|
+
reader = TreeReader.from_file( "#{TextUtils.root}/test/data/de-deutschland/3--by-bayern/4--oberfranken/orte.txt" )
|
14
|
+
|
15
|
+
reader.each_line do |_|
|
16
|
+
## do nothing for now
|
17
|
+
end
|
18
|
+
|
19
|
+
assert true ## assume everything ok if we get here
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_de
|
23
|
+
reader = TreeReader.from_file( "#{TextUtils.root}/test/data/de-deutschland/orte.txt" )
|
24
|
+
|
25
|
+
reader.each_line do |_|
|
26
|
+
## do nothing for now
|
27
|
+
end
|
28
|
+
|
29
|
+
assert true ## assume everything ok if we get here
|
30
|
+
end
|
31
|
+
|
32
|
+
end # class TestTreeReader
|
33
|
+
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: textutils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
@@ -130,12 +130,14 @@ files:
|
|
130
130
|
- lib/textutils/helper/xml_helper.rb
|
131
131
|
- lib/textutils/page.rb
|
132
132
|
- lib/textutils/parser/name_parser.rb
|
133
|
+
- lib/textutils/parser/name_tokenizer.rb
|
133
134
|
- lib/textutils/patterns.rb
|
134
135
|
- lib/textutils/reader/block_reader.rb
|
135
136
|
- lib/textutils/reader/code_reader.rb
|
136
137
|
- lib/textutils/reader/fixture_reader.rb
|
137
138
|
- lib/textutils/reader/hash_reader.rb
|
138
139
|
- lib/textutils/reader/line_reader.rb
|
140
|
+
- lib/textutils/reader/tree_reader.rb
|
139
141
|
- lib/textutils/reader/values_reader.rb
|
140
142
|
- lib/textutils/sanitizier.rb
|
141
143
|
- lib/textutils/title.rb
|
@@ -143,6 +145,9 @@ files:
|
|
143
145
|
- lib/textutils/utils.rb
|
144
146
|
- lib/textutils/version.rb
|
145
147
|
- test/data/cl_all.txt
|
148
|
+
- test/data/de-deutschland/3--by-bayern/4--oberfranken/orte.txt
|
149
|
+
- test/data/de-deutschland/3--by-bayern/4--oberfranken/orte_ii.txt
|
150
|
+
- test/data/de-deutschland/orte.txt
|
146
151
|
- test/data/feedburner.txt
|
147
152
|
- test/helper.rb
|
148
153
|
- test/test_address_helper.rb
|
@@ -155,6 +160,7 @@ files:
|
|
155
160
|
- test/test_title_finder.rb
|
156
161
|
- test/test_title_helper.rb
|
157
162
|
- test/test_title_mapper.rb
|
163
|
+
- test/test_tree_reader.rb
|
158
164
|
- test/test_unicode_helper.rb
|
159
165
|
- test/test_values_reader.rb
|
160
166
|
homepage: https://github.com/textkit/textutils
|
@@ -188,6 +194,7 @@ test_files:
|
|
188
194
|
- test/test_fixture_reader.rb
|
189
195
|
- test/test_unicode_helper.rb
|
190
196
|
- test/test_asciify.rb
|
197
|
+
- test/test_tree_reader.rb
|
191
198
|
- test/test_title_mapper.rb
|
192
199
|
- test/test_values_reader.rb
|
193
200
|
- test/test_taglist.rb
|