textutils 0.5.7 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,8 +12,166 @@ class ValuesReader
12
12
  @data = File.read_utf8( @path )
13
13
  end
14
14
 
15
- def each_line
16
-
15
+
16
+ ##########
17
+ # todo/fix:
18
+ # create a new ValuesReaderEx or ValuesReaderV2 or similar
19
+ # - handle tags (last entry, split up into entries)
20
+ # - handle key:value pairs (split up and return in ordered hash)
21
+ # and so on - lets us reuse code for tags and more
22
+
23
+
24
+ def each_line # support multi line records
25
+
26
+ inside_line = false # todo: find a better name? e.g. line_found?
27
+ attribs = {} # rename to new_attributes?
28
+ more_cols = [] # rename to more_values?
29
+
30
+
31
+ @data.each_line do |line|
32
+
33
+ ## allow alternative comment lines
34
+ ## e.g. -- comment or
35
+ ## % comment
36
+ ## why? # might get used by markdown for marking headers, for example
37
+
38
+ ## NB: for now alternative comment lines not allowed as end of line style e.g
39
+ ## some data, more data -- comment here
40
+
41
+ if line =~ /^\s*#/ || line =~ /^\s*--/ || line =~ /^\s*%/
42
+ # skip komments and do NOT copy to result (keep comments secret!)
43
+ logger.debug 'skipping comment line'
44
+ next
45
+ end
46
+
47
+ if line =~ /^\s*$/
48
+ # kommentar oder leerzeile überspringen
49
+ logger.debug 'skipping blank line'
50
+ next
51
+ end
52
+
53
+ # pass 1) remove possible trailing eol comment
54
+ ## e.g -> nyc, New York # Sample EOL Comment Here (with or without commas,,,,)
55
+ ## becomes -> nyc, New York
56
+
57
+ line = line.sub( /\s+#.+$/, '' )
58
+
59
+ # pass 2) remove leading and trailing whitespace
60
+
61
+ line = line.strip
62
+
63
+
64
+ ### check for multiline record
65
+ ## must start with key and colon e.g. brands:
66
+ if line =~ /^[a-z][a-z0-9.][a-z0-9]:/
67
+ # NB: every additional line is one value e.g. city:wien, etc.
68
+ # allows you to use any chars
69
+ logger.debug " multi-line record - add key-value >#{line}<"
70
+
71
+ more_cols.unshift( line.dup ) # add value upfront to array (first value); lets us keep (optional) tags as last entry; fix!! see valuereaderEx v2
72
+ next
73
+ else
74
+ # NB: new record clears/ends multi-line record
75
+
76
+ if inside_line # check if we already processed a line? if yes; yield last line
77
+ yield( attribs, more_cols )
78
+ attribs = {}
79
+ more_cols = []
80
+ end
81
+ inside_line = true
82
+ end
83
+
84
+
85
+ ### guard escaped commas (e.g. \,)
86
+ line = line.gsub( '\,', '@commma@' )
87
+
88
+ ## use generic separator (allow us to configure separator)
89
+ line = line.gsub( ',', '@sep@')
90
+
91
+ ## restore escaped commas (before split)
92
+ line = line.gsub( '@commma@', ',' )
93
+
94
+
95
+ logger.debug "line: >>#{line}<<"
96
+
97
+ values = line.split( '@sep@' )
98
+
99
+ # pass 1) remove leading and trailing whitespace for values
100
+
101
+ values = values.map { |value| value.strip }
102
+
103
+ ##### todo remove support of comment column? (NB: must NOT include commas)
104
+ # pass 2) remove comment columns
105
+
106
+ values = values.select do |value|
107
+ if value =~ /^#/ ## start with # treat it as a comment column; e.g. remove it
108
+ logger.debug " removing column with value >>#{value}<<"
109
+ false
110
+ else
111
+ true
112
+ end
113
+ end
114
+
115
+ logger.debug " values: >>#{values.join('<< >>')}<<"
116
+
117
+
118
+ ### todo/fix: allow check - do NOT allow mixed use of with key and w/o key
119
+ ## either use keys or do NOT use keys; do NOT mix in a single fixture file
120
+
121
+
122
+ ### support autogenerate key from first title value
123
+
124
+ # if it looks like a key (only a-z lower case allowed); assume it's a key
125
+ # - also allow . in keys e.g. world.quali.america, at.cup, etc.
126
+ # - also allow 0-9 in keys e.g. at.2, at.3.1, etc.
127
+
128
+ # fix/todo: add support for leading underscore _
129
+ # or allow keys starting w/ digits?
130
+ if values[0] =~ /^([a-z][a-z0-9.]*[a-z0-9]|[a-z])$/ # NB: key must start w/ a-z letter (NB: minimum one letter possible)
131
+ key_col = values[0]
132
+ title_col = values[1]
133
+ more_cols = values[2..-1]
134
+ else
135
+ key_col = '<auto>'
136
+ title_col = values[0]
137
+ more_cols = values[1..-1]
138
+ end
139
+
140
+ attribs = {}
141
+
142
+ ## title (split of optional synonyms)
143
+ # e.g. FC Bayern Muenchen|Bayern Muenchen|Bayern
144
+ titles = title_col.split('|')
145
+
146
+ attribs[ :title ] = titles[0]
147
+
148
+ ## add optional synonyms if present
149
+ attribs[ :synonyms ] = titles[1..-1].join('|') if titles.size > 1
150
+
151
+ if key_col == '<auto>'
152
+ ## autogenerate key from first title
153
+ key_col = title_to_key( titles[0] )
154
+ logger.debug " autogen key >#{key_col}< from title >#{titles[0]}<, textutils version #{TextUtils::VERSION}"
155
+ end
156
+
157
+ attribs[ :key ] = key_col
158
+
159
+ attribs = attribs.merge( @more_values ) # e.g. merge country_id and other defaults if present
160
+
161
+ end # each lines
162
+
163
+ # do NOT forget to yield last line (if present/processed)
164
+ if inside_line
165
+ yield( attribs, more_cols )
166
+ end
167
+
168
+
169
+ end # method each_line
170
+
171
+
172
+
173
+ def each_line_old_single_line_records_only
174
+
17
175
  @data.each_line do |line|
18
176
 
19
177
  ## allow alternative comment lines
@@ -149,19 +307,9 @@ class ValuesReader
149
307
  ## remove all whitespace and punctuation
150
308
  key = key.gsub( /[ \t_\-\.()\[\]'"\/]/, '' )
151
309
 
152
- logger.debug " before remove special chars >#{key}<"
153
-
154
310
  ## remove special chars (e.g. %°&)
155
311
  key = key.gsub( /[%&°]/, '' )
156
312
 
157
- logger.debug " after remove special chars - step 1 >#{key}<"
158
-
159
- ## remove &&&& - try again / why is it not working?
160
- key = key.gsub( /&/, '' )
161
-
162
- logger.debug " after remove special chars - step 2 >#{key}<"
163
-
164
-
165
313
  ## turn accented char into ascii look alike if possible
166
314
  ##
167
315
  ## todo: add some more
@@ -1,6 +1,6 @@
1
1
 
2
2
  module TextUtils
3
3
 
4
- VERSION = '0.5.7'
4
+ VERSION = '0.5.8'
5
5
 
6
6
  end # module TextUtils
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textutils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.7
4
+ version: 0.5.8
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-05-04 00:00:00.000000000 Z
12
+ date: 2013-05-05 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: logutils
16
- requirement: &77681200 !ruby/object:Gem::Requirement
16
+ requirement: &73298570 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0.5'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *77681200
24
+ version_requirements: *73298570
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rdoc
27
- requirement: &77680980 !ruby/object:Gem::Requirement
27
+ requirement: &73298350 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '3.10'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *77680980
35
+ version_requirements: *73298350
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: hoe
38
- requirement: &77680760 !ruby/object:Gem::Requirement
38
+ requirement: &73298130 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: '3.3'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *77680760
46
+ version_requirements: *73298130
47
47
  description: textutils - Text Filters, Helpers, Readers and More
48
48
  email: webslideshow@googlegroups.com
49
49
  executables: []