textutils 0.5.7 → 0.5.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -12,8 +12,166 @@ class ValuesReader
12
12
  @data = File.read_utf8( @path )
13
13
  end
14
14
 
15
- def each_line
16
-
15
+
16
+ ##########
17
+ # todo/fix:
18
+ # create a new ValuesReaderEx or ValuesReaderV2 or similar
19
+ # - handle tags (last entry, split up into entries)
20
+ # - handle key:value pairs (split up and return in ordered hash)
21
+ # and so on - lets us reuse code for tags and more
22
+
23
+
24
+ def each_line # support multi line records
25
+
26
+ inside_line = false # todo: find a better name? e.g. line_found?
27
+ attribs = {} # rename to new_attributes?
28
+ more_cols = [] # rename to more_values?
29
+
30
+
31
+ @data.each_line do |line|
32
+
33
+ ## allow alternative comment lines
34
+ ## e.g. -- comment or
35
+ ## % comment
36
+ ## why? # might get used by markdown for marking headers, for example
37
+
38
+ ## NB: for now alternative comment lines not allowed as end of line style e.g
39
+ ## some data, more data -- comment here
40
+
41
+ if line =~ /^\s*#/ || line =~ /^\s*--/ || line =~ /^\s*%/
42
+ # skip komments and do NOT copy to result (keep comments secret!)
43
+ logger.debug 'skipping comment line'
44
+ next
45
+ end
46
+
47
+ if line =~ /^\s*$/
48
+ # kommentar oder leerzeile überspringen
49
+ logger.debug 'skipping blank line'
50
+ next
51
+ end
52
+
53
+ # pass 1) remove possible trailing eol comment
54
+ ## e.g -> nyc, New York # Sample EOL Comment Here (with or without commas,,,,)
55
+ ## becomes -> nyc, New York
56
+
57
+ line = line.sub( /\s+#.+$/, '' )
58
+
59
+ # pass 2) remove leading and trailing whitespace
60
+
61
+ line = line.strip
62
+
63
+
64
+ ### check for multiline record
65
+ ## must start with key and colon e.g. brands:
66
+ if line =~ /^[a-z][a-z0-9.][a-z0-9]:/
67
+ # NB: every additional line is one value e.g. city:wien, etc.
68
+ # allows you to use any chars
69
+ logger.debug " multi-line record - add key-value >#{line}<"
70
+
71
+ more_cols.unshift( line.dup ) # add value upfront to array (first value); lets us keep (optional) tags as last entry; fix!! see valuereaderEx v2
72
+ next
73
+ else
74
+ # NB: new record clears/ends multi-line record
75
+
76
+ if inside_line # check if we already processed a line? if yes; yield last line
77
+ yield( attribs, more_cols )
78
+ attribs = {}
79
+ more_cols = []
80
+ end
81
+ inside_line = true
82
+ end
83
+
84
+
85
+ ### guard escaped commas (e.g. \,)
86
+ line = line.gsub( '\,', '@commma@' )
87
+
88
+ ## use generic separator (allow us to configure separator)
89
+ line = line.gsub( ',', '@sep@')
90
+
91
+ ## restore escaped commas (before split)
92
+ line = line.gsub( '@commma@', ',' )
93
+
94
+
95
+ logger.debug "line: >>#{line}<<"
96
+
97
+ values = line.split( '@sep@' )
98
+
99
+ # pass 1) remove leading and trailing whitespace for values
100
+
101
+ values = values.map { |value| value.strip }
102
+
103
+ ##### todo remove support of comment column? (NB: must NOT include commas)
104
+ # pass 2) remove comment columns
105
+
106
+ values = values.select do |value|
107
+ if value =~ /^#/ ## start with # treat it as a comment column; e.g. remove it
108
+ logger.debug " removing column with value >>#{value}<<"
109
+ false
110
+ else
111
+ true
112
+ end
113
+ end
114
+
115
+ logger.debug " values: >>#{values.join('<< >>')}<<"
116
+
117
+
118
+ ### todo/fix: allow check - do NOT allow mixed use of with key and w/o key
119
+ ## either use keys or do NOT use keys; do NOT mix in a single fixture file
120
+
121
+
122
+ ### support autogenerate key from first title value
123
+
124
+ # if it looks like a key (only a-z lower case allowed); assume it's a key
125
+ # - also allow . in keys e.g. world.quali.america, at.cup, etc.
126
+ # - also allow 0-9 in keys e.g. at.2, at.3.1, etc.
127
+
128
+ # fix/todo: add support for leading underscore _
129
+ # or allow keys starting w/ digits?
130
+ if values[0] =~ /^([a-z][a-z0-9.]*[a-z0-9]|[a-z])$/ # NB: key must start w/ a-z letter (NB: minimum one letter possible)
131
+ key_col = values[0]
132
+ title_col = values[1]
133
+ more_cols = values[2..-1]
134
+ else
135
+ key_col = '<auto>'
136
+ title_col = values[0]
137
+ more_cols = values[1..-1]
138
+ end
139
+
140
+ attribs = {}
141
+
142
+ ## title (split of optional synonyms)
143
+ # e.g. FC Bayern Muenchen|Bayern Muenchen|Bayern
144
+ titles = title_col.split('|')
145
+
146
+ attribs[ :title ] = titles[0]
147
+
148
+ ## add optional synonyms if present
149
+ attribs[ :synonyms ] = titles[1..-1].join('|') if titles.size > 1
150
+
151
+ if key_col == '<auto>'
152
+ ## autogenerate key from first title
153
+ key_col = title_to_key( titles[0] )
154
+ logger.debug " autogen key >#{key_col}< from title >#{titles[0]}<, textutils version #{TextUtils::VERSION}"
155
+ end
156
+
157
+ attribs[ :key ] = key_col
158
+
159
+ attribs = attribs.merge( @more_values ) # e.g. merge country_id and other defaults if present
160
+
161
+ end # each lines
162
+
163
+ # do NOT forget to yield last line (if present/processed)
164
+ if inside_line
165
+ yield( attribs, more_cols )
166
+ end
167
+
168
+
169
+ end # method each_line
170
+
171
+
172
+
173
+ def each_line_old_single_line_records_only
174
+
17
175
  @data.each_line do |line|
18
176
 
19
177
  ## allow alternative comment lines
@@ -149,19 +307,9 @@ class ValuesReader
149
307
  ## remove all whitespace and punctuation
150
308
  key = key.gsub( /[ \t_\-\.()\[\]'"\/]/, '' )
151
309
 
152
- logger.debug " before remove special chars >#{key}<"
153
-
154
310
  ## remove special chars (e.g. %°&)
155
311
  key = key.gsub( /[%&°]/, '' )
156
312
 
157
- logger.debug " after remove special chars - step 1 >#{key}<"
158
-
159
- ## remove &&&& - try again / why is it not working?
160
- key = key.gsub( /&/, '' )
161
-
162
- logger.debug " after remove special chars - step 2 >#{key}<"
163
-
164
-
165
313
  ## turn accented char into ascii look alike if possible
166
314
  ##
167
315
  ## todo: add some more
@@ -1,6 +1,6 @@
1
1
 
2
2
  module TextUtils
3
3
 
4
- VERSION = '0.5.7'
4
+ VERSION = '0.5.8'
5
5
 
6
6
  end # module TextUtils
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textutils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.7
4
+ version: 0.5.8
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-05-04 00:00:00.000000000 Z
12
+ date: 2013-05-05 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: logutils
16
- requirement: &77681200 !ruby/object:Gem::Requirement
16
+ requirement: &73298570 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0.5'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *77681200
24
+ version_requirements: *73298570
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rdoc
27
- requirement: &77680980 !ruby/object:Gem::Requirement
27
+ requirement: &73298350 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '3.10'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *77680980
35
+ version_requirements: *73298350
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: hoe
38
- requirement: &77680760 !ruby/object:Gem::Requirement
38
+ requirement: &73298130 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: '3.3'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *77680760
46
+ version_requirements: *73298130
47
47
  description: textutils - Text Filters, Helpers, Readers and More
48
48
  email: webslideshow@googlegroups.com
49
49
  executables: []