textutils 0.5.7 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/textutils/reader/values_reader.rb +160 -12
- data/lib/textutils/version.rb +1 -1
- metadata +8 -8
@@ -12,8 +12,166 @@ class ValuesReader
|
|
12
12
|
@data = File.read_utf8( @path )
|
13
13
|
end
|
14
14
|
|
15
|
-
|
16
|
-
|
15
|
+
|
16
|
+
##########
|
17
|
+
# todo/fix:
|
18
|
+
# create a new ValuesReaderEx or ValuesReaderV2 or similar
|
19
|
+
# - handle tags (last entry, split up into entries)
|
20
|
+
# - handle key:value pairs (split up and return in ordered hash)
|
21
|
+
# and so on - lets us reuse code for tags and more
|
22
|
+
|
23
|
+
|
24
|
+
def each_line # support multi line records
|
25
|
+
|
26
|
+
inside_line = false # todo: find a better name? e.g. line_found?
|
27
|
+
attribs = {} # rename to new_attributes?
|
28
|
+
more_cols = [] # rename to more_values?
|
29
|
+
|
30
|
+
|
31
|
+
@data.each_line do |line|
|
32
|
+
|
33
|
+
## allow alternative comment lines
|
34
|
+
## e.g. -- comment or
|
35
|
+
## % comment
|
36
|
+
## why? # might get used by markdown for marking headers, for example
|
37
|
+
|
38
|
+
## NB: for now alternative comment lines not allowed as end of line style e.g
|
39
|
+
## some data, more data -- comment here
|
40
|
+
|
41
|
+
if line =~ /^\s*#/ || line =~ /^\s*--/ || line =~ /^\s*%/
|
42
|
+
# skip komments and do NOT copy to result (keep comments secret!)
|
43
|
+
logger.debug 'skipping comment line'
|
44
|
+
next
|
45
|
+
end
|
46
|
+
|
47
|
+
if line =~ /^\s*$/
|
48
|
+
# kommentar oder leerzeile überspringen
|
49
|
+
logger.debug 'skipping blank line'
|
50
|
+
next
|
51
|
+
end
|
52
|
+
|
53
|
+
# pass 1) remove possible trailing eol comment
|
54
|
+
## e.g -> nyc, New York # Sample EOL Comment Here (with or without commas,,,,)
|
55
|
+
## becomes -> nyc, New York
|
56
|
+
|
57
|
+
line = line.sub( /\s+#.+$/, '' )
|
58
|
+
|
59
|
+
# pass 2) remove leading and trailing whitespace
|
60
|
+
|
61
|
+
line = line.strip
|
62
|
+
|
63
|
+
|
64
|
+
### check for multiline record
|
65
|
+
## must start with key and colon e.g. brands:
|
66
|
+
if line =~ /^[a-z][a-z0-9.][a-z0-9]:/
|
67
|
+
# NB: every additional line is one value e.g. city:wien, etc.
|
68
|
+
# allows you to use any chars
|
69
|
+
logger.debug " multi-line record - add key-value >#{line}<"
|
70
|
+
|
71
|
+
more_cols.unshift( line.dup ) # add value upfront to array (first value); lets us keep (optional) tags as last entry; fix!! see valuereaderEx v2
|
72
|
+
next
|
73
|
+
else
|
74
|
+
# NB: new record clears/ends multi-line record
|
75
|
+
|
76
|
+
if inside_line # check if we already processed a line? if yes; yield last line
|
77
|
+
yield( attribs, more_cols )
|
78
|
+
attribs = {}
|
79
|
+
more_cols = []
|
80
|
+
end
|
81
|
+
inside_line = true
|
82
|
+
end
|
83
|
+
|
84
|
+
|
85
|
+
### guard escaped commas (e.g. \,)
|
86
|
+
line = line.gsub( '\,', '@commma@' )
|
87
|
+
|
88
|
+
## use generic separator (allow us to configure separator)
|
89
|
+
line = line.gsub( ',', '@sep@')
|
90
|
+
|
91
|
+
## restore escaped commas (before split)
|
92
|
+
line = line.gsub( '@commma@', ',' )
|
93
|
+
|
94
|
+
|
95
|
+
logger.debug "line: >>#{line}<<"
|
96
|
+
|
97
|
+
values = line.split( '@sep@' )
|
98
|
+
|
99
|
+
# pass 1) remove leading and trailing whitespace for values
|
100
|
+
|
101
|
+
values = values.map { |value| value.strip }
|
102
|
+
|
103
|
+
##### todo remove support of comment column? (NB: must NOT include commas)
|
104
|
+
# pass 2) remove comment columns
|
105
|
+
|
106
|
+
values = values.select do |value|
|
107
|
+
if value =~ /^#/ ## start with # treat it as a comment column; e.g. remove it
|
108
|
+
logger.debug " removing column with value >>#{value}<<"
|
109
|
+
false
|
110
|
+
else
|
111
|
+
true
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
logger.debug " values: >>#{values.join('<< >>')}<<"
|
116
|
+
|
117
|
+
|
118
|
+
### todo/fix: allow check - do NOT allow mixed use of with key and w/o key
|
119
|
+
## either use keys or do NOT use keys; do NOT mix in a single fixture file
|
120
|
+
|
121
|
+
|
122
|
+
### support autogenerate key from first title value
|
123
|
+
|
124
|
+
# if it looks like a key (only a-z lower case allowed); assume it's a key
|
125
|
+
# - also allow . in keys e.g. world.quali.america, at.cup, etc.
|
126
|
+
# - also allow 0-9 in keys e.g. at.2, at.3.1, etc.
|
127
|
+
|
128
|
+
# fix/todo: add support for leading underscore _
|
129
|
+
# or allow keys starting w/ digits?
|
130
|
+
if values[0] =~ /^([a-z][a-z0-9.]*[a-z0-9]|[a-z])$/ # NB: key must start w/ a-z letter (NB: minimum one letter possible)
|
131
|
+
key_col = values[0]
|
132
|
+
title_col = values[1]
|
133
|
+
more_cols = values[2..-1]
|
134
|
+
else
|
135
|
+
key_col = '<auto>'
|
136
|
+
title_col = values[0]
|
137
|
+
more_cols = values[1..-1]
|
138
|
+
end
|
139
|
+
|
140
|
+
attribs = {}
|
141
|
+
|
142
|
+
## title (split of optional synonyms)
|
143
|
+
# e.g. FC Bayern Muenchen|Bayern Muenchen|Bayern
|
144
|
+
titles = title_col.split('|')
|
145
|
+
|
146
|
+
attribs[ :title ] = titles[0]
|
147
|
+
|
148
|
+
## add optional synonyms if present
|
149
|
+
attribs[ :synonyms ] = titles[1..-1].join('|') if titles.size > 1
|
150
|
+
|
151
|
+
if key_col == '<auto>'
|
152
|
+
## autogenerate key from first title
|
153
|
+
key_col = title_to_key( titles[0] )
|
154
|
+
logger.debug " autogen key >#{key_col}< from title >#{titles[0]}<, textutils version #{TextUtils::VERSION}"
|
155
|
+
end
|
156
|
+
|
157
|
+
attribs[ :key ] = key_col
|
158
|
+
|
159
|
+
attribs = attribs.merge( @more_values ) # e.g. merge country_id and other defaults if present
|
160
|
+
|
161
|
+
end # each lines
|
162
|
+
|
163
|
+
# do NOT forget to yield last line (if present/processed)
|
164
|
+
if inside_line
|
165
|
+
yield( attribs, more_cols )
|
166
|
+
end
|
167
|
+
|
168
|
+
|
169
|
+
end # method each_line
|
170
|
+
|
171
|
+
|
172
|
+
|
173
|
+
def each_line_old_single_line_records_only
|
174
|
+
|
17
175
|
@data.each_line do |line|
|
18
176
|
|
19
177
|
## allow alternative comment lines
|
@@ -149,19 +307,9 @@ class ValuesReader
|
|
149
307
|
## remove all whitespace and punctuation
|
150
308
|
key = key.gsub( /[ \t_\-\.()\[\]'"\/]/, '' )
|
151
309
|
|
152
|
-
logger.debug " before remove special chars >#{key}<"
|
153
|
-
|
154
310
|
## remove special chars (e.g. %°&)
|
155
311
|
key = key.gsub( /[%&°]/, '' )
|
156
312
|
|
157
|
-
logger.debug " after remove special chars - step 1 >#{key}<"
|
158
|
-
|
159
|
-
## remove &&&& - try again / why is it not working?
|
160
|
-
key = key.gsub( /&/, '' )
|
161
|
-
|
162
|
-
logger.debug " after remove special chars - step 2 >#{key}<"
|
163
|
-
|
164
|
-
|
165
313
|
## turn accented char into ascii look alike if possible
|
166
314
|
##
|
167
315
|
## todo: add some more
|
data/lib/textutils/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: textutils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.8
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-05-
|
12
|
+
date: 2013-05-05 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: logutils
|
16
|
-
requirement: &
|
16
|
+
requirement: &73298570 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0.5'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *73298570
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rdoc
|
27
|
-
requirement: &
|
27
|
+
requirement: &73298350 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '3.10'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *73298350
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: hoe
|
38
|
-
requirement: &
|
38
|
+
requirement: &73298130 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,7 +43,7 @@ dependencies:
|
|
43
43
|
version: '3.3'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *73298130
|
47
47
|
description: textutils - Text Filters, Helpers, Readers and More
|
48
48
|
email: webslideshow@googlegroups.com
|
49
49
|
executables: []
|