textutils 0.5.7 → 0.5.8
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/textutils/reader/values_reader.rb +160 -12
- data/lib/textutils/version.rb +1 -1
- metadata +8 -8
@@ -12,8 +12,166 @@ class ValuesReader
|
|
12
12
|
@data = File.read_utf8( @path )
|
13
13
|
end
|
14
14
|
|
15
|
-
|
16
|
-
|
15
|
+
|
16
|
+
##########
|
17
|
+
# todo/fix:
|
18
|
+
# create a new ValuesReaderEx or ValuesReaderV2 or similar
|
19
|
+
# - handle tags (last entry, split up into entries)
|
20
|
+
# - handle key:value pairs (split up and return in ordered hash)
|
21
|
+
# and so on - lets us reuse code for tags and more
|
22
|
+
|
23
|
+
|
24
|
+
def each_line # support multi line records
|
25
|
+
|
26
|
+
inside_line = false # todo: find a better name? e.g. line_found?
|
27
|
+
attribs = {} # rename to new_attributes?
|
28
|
+
more_cols = [] # rename to more_values?
|
29
|
+
|
30
|
+
|
31
|
+
@data.each_line do |line|
|
32
|
+
|
33
|
+
## allow alternative comment lines
|
34
|
+
## e.g. -- comment or
|
35
|
+
## % comment
|
36
|
+
## why? # might get used by markdown for marking headers, for example
|
37
|
+
|
38
|
+
## NB: for now alternative comment lines not allowed as end of line style e.g
|
39
|
+
## some data, more data -- comment here
|
40
|
+
|
41
|
+
if line =~ /^\s*#/ || line =~ /^\s*--/ || line =~ /^\s*%/
|
42
|
+
# skip komments and do NOT copy to result (keep comments secret!)
|
43
|
+
logger.debug 'skipping comment line'
|
44
|
+
next
|
45
|
+
end
|
46
|
+
|
47
|
+
if line =~ /^\s*$/
|
48
|
+
# kommentar oder leerzeile überspringen
|
49
|
+
logger.debug 'skipping blank line'
|
50
|
+
next
|
51
|
+
end
|
52
|
+
|
53
|
+
# pass 1) remove possible trailing eol comment
|
54
|
+
## e.g -> nyc, New York # Sample EOL Comment Here (with or without commas,,,,)
|
55
|
+
## becomes -> nyc, New York
|
56
|
+
|
57
|
+
line = line.sub( /\s+#.+$/, '' )
|
58
|
+
|
59
|
+
# pass 2) remove leading and trailing whitespace
|
60
|
+
|
61
|
+
line = line.strip
|
62
|
+
|
63
|
+
|
64
|
+
### check for multiline record
|
65
|
+
## must start with key and colon e.g. brands:
|
66
|
+
if line =~ /^[a-z][a-z0-9.][a-z0-9]:/
|
67
|
+
# NB: every additional line is one value e.g. city:wien, etc.
|
68
|
+
# allows you to use any chars
|
69
|
+
logger.debug " multi-line record - add key-value >#{line}<"
|
70
|
+
|
71
|
+
more_cols.unshift( line.dup ) # add value upfront to array (first value); lets us keep (optional) tags as last entry; fix!! see valuereaderEx v2
|
72
|
+
next
|
73
|
+
else
|
74
|
+
# NB: new record clears/ends multi-line record
|
75
|
+
|
76
|
+
if inside_line # check if we already processed a line? if yes; yield last line
|
77
|
+
yield( attribs, more_cols )
|
78
|
+
attribs = {}
|
79
|
+
more_cols = []
|
80
|
+
end
|
81
|
+
inside_line = true
|
82
|
+
end
|
83
|
+
|
84
|
+
|
85
|
+
### guard escaped commas (e.g. \,)
|
86
|
+
line = line.gsub( '\,', '@commma@' )
|
87
|
+
|
88
|
+
## use generic separator (allow us to configure separator)
|
89
|
+
line = line.gsub( ',', '@sep@')
|
90
|
+
|
91
|
+
## restore escaped commas (before split)
|
92
|
+
line = line.gsub( '@commma@', ',' )
|
93
|
+
|
94
|
+
|
95
|
+
logger.debug "line: >>#{line}<<"
|
96
|
+
|
97
|
+
values = line.split( '@sep@' )
|
98
|
+
|
99
|
+
# pass 1) remove leading and trailing whitespace for values
|
100
|
+
|
101
|
+
values = values.map { |value| value.strip }
|
102
|
+
|
103
|
+
##### todo remove support of comment column? (NB: must NOT include commas)
|
104
|
+
# pass 2) remove comment columns
|
105
|
+
|
106
|
+
values = values.select do |value|
|
107
|
+
if value =~ /^#/ ## start with # treat it as a comment column; e.g. remove it
|
108
|
+
logger.debug " removing column with value >>#{value}<<"
|
109
|
+
false
|
110
|
+
else
|
111
|
+
true
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
logger.debug " values: >>#{values.join('<< >>')}<<"
|
116
|
+
|
117
|
+
|
118
|
+
### todo/fix: allow check - do NOT allow mixed use of with key and w/o key
|
119
|
+
## either use keys or do NOT use keys; do NOT mix in a single fixture file
|
120
|
+
|
121
|
+
|
122
|
+
### support autogenerate key from first title value
|
123
|
+
|
124
|
+
# if it looks like a key (only a-z lower case allowed); assume it's a key
|
125
|
+
# - also allow . in keys e.g. world.quali.america, at.cup, etc.
|
126
|
+
# - also allow 0-9 in keys e.g. at.2, at.3.1, etc.
|
127
|
+
|
128
|
+
# fix/todo: add support for leading underscore _
|
129
|
+
# or allow keys starting w/ digits?
|
130
|
+
if values[0] =~ /^([a-z][a-z0-9.]*[a-z0-9]|[a-z])$/ # NB: key must start w/ a-z letter (NB: minimum one letter possible)
|
131
|
+
key_col = values[0]
|
132
|
+
title_col = values[1]
|
133
|
+
more_cols = values[2..-1]
|
134
|
+
else
|
135
|
+
key_col = '<auto>'
|
136
|
+
title_col = values[0]
|
137
|
+
more_cols = values[1..-1]
|
138
|
+
end
|
139
|
+
|
140
|
+
attribs = {}
|
141
|
+
|
142
|
+
## title (split of optional synonyms)
|
143
|
+
# e.g. FC Bayern Muenchen|Bayern Muenchen|Bayern
|
144
|
+
titles = title_col.split('|')
|
145
|
+
|
146
|
+
attribs[ :title ] = titles[0]
|
147
|
+
|
148
|
+
## add optional synonyms if present
|
149
|
+
attribs[ :synonyms ] = titles[1..-1].join('|') if titles.size > 1
|
150
|
+
|
151
|
+
if key_col == '<auto>'
|
152
|
+
## autogenerate key from first title
|
153
|
+
key_col = title_to_key( titles[0] )
|
154
|
+
logger.debug " autogen key >#{key_col}< from title >#{titles[0]}<, textutils version #{TextUtils::VERSION}"
|
155
|
+
end
|
156
|
+
|
157
|
+
attribs[ :key ] = key_col
|
158
|
+
|
159
|
+
attribs = attribs.merge( @more_values ) # e.g. merge country_id and other defaults if present
|
160
|
+
|
161
|
+
end # each lines
|
162
|
+
|
163
|
+
# do NOT forget to yield last line (if present/processed)
|
164
|
+
if inside_line
|
165
|
+
yield( attribs, more_cols )
|
166
|
+
end
|
167
|
+
|
168
|
+
|
169
|
+
end # method each_line
|
170
|
+
|
171
|
+
|
172
|
+
|
173
|
+
def each_line_old_single_line_records_only
|
174
|
+
|
17
175
|
@data.each_line do |line|
|
18
176
|
|
19
177
|
## allow alternative comment lines
|
@@ -149,19 +307,9 @@ class ValuesReader
|
|
149
307
|
## remove all whitespace and punctuation
|
150
308
|
key = key.gsub( /[ \t_\-\.()\[\]'"\/]/, '' )
|
151
309
|
|
152
|
-
logger.debug " before remove special chars >#{key}<"
|
153
|
-
|
154
310
|
## remove special chars (e.g. %°&)
|
155
311
|
key = key.gsub( /[%&°]/, '' )
|
156
312
|
|
157
|
-
logger.debug " after remove special chars - step 1 >#{key}<"
|
158
|
-
|
159
|
-
## remove &&&& - try again / why is it not working?
|
160
|
-
key = key.gsub( /&/, '' )
|
161
|
-
|
162
|
-
logger.debug " after remove special chars - step 2 >#{key}<"
|
163
|
-
|
164
|
-
|
165
313
|
## turn accented char into ascii look alike if possible
|
166
314
|
##
|
167
315
|
## todo: add some more
|
data/lib/textutils/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: textutils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.8
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-05-
|
12
|
+
date: 2013-05-05 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: logutils
|
16
|
-
requirement: &
|
16
|
+
requirement: &73298570 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0.5'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *73298570
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rdoc
|
27
|
-
requirement: &
|
27
|
+
requirement: &73298350 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '3.10'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *73298350
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: hoe
|
38
|
-
requirement: &
|
38
|
+
requirement: &73298130 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,7 +43,7 @@ dependencies:
|
|
43
43
|
version: '3.3'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *73298130
|
47
47
|
description: textutils - Text Filters, Helpers, Readers and More
|
48
48
|
email: webslideshow@googlegroups.com
|
49
49
|
executables: []
|