remote_table 0.2.32 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +5 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +65 -0
- data/LICENSE +1 -1
- data/README.rdoc +21 -7
- data/Rakefile +12 -61
- data/lib/remote_table/cleaner.rb +19 -0
- data/lib/remote_table/executor.rb +29 -0
- data/lib/remote_table/format/delimited.rb +62 -0
- data/lib/remote_table/format/excel.rb +10 -0
- data/lib/remote_table/format/excelx.rb +10 -0
- data/lib/remote_table/format/fixed_width.rb +47 -0
- data/lib/remote_table/format/html.rb +43 -0
- data/lib/remote_table/format/mixins/rooable.rb +47 -0
- data/lib/remote_table/format/mixins/textual.rb +34 -0
- data/lib/remote_table/format/open_office.rb +10 -0
- data/lib/remote_table/format.rb +35 -0
- data/lib/remote_table/hasher.rb +25 -0
- data/lib/remote_table/local_file.rb +92 -0
- data/lib/remote_table/properties.rb +209 -0
- data/lib/remote_table/transformer.rb +17 -0
- data/lib/remote_table/version.rb +3 -0
- data/lib/remote_table.rb +91 -99
- data/remote_table.gemspec +32 -77
- data/test/{test_helper.rb → helper.rb} +9 -2
- data/test/test_big.rb +61 -0
- data/test/test_errata.rb +46 -0
- data/test/test_old_syntax.rb +229 -0
- data/test/test_old_transform.rb +49 -0
- data/test/test_remote_table.rb +13 -0
- metadata +176 -53
- data/VERSION +0 -1
- data/lib/remote_table/file/csv.rb +0 -49
- data/lib/remote_table/file/fixed_width.rb +0 -19
- data/lib/remote_table/file/html.rb +0 -37
- data/lib/remote_table/file/ods.rb +0 -11
- data/lib/remote_table/file/roo_spreadsheet.rb +0 -44
- data/lib/remote_table/file/xls.rb +0 -11
- data/lib/remote_table/file/xlsx.rb +0 -11
- data/lib/remote_table/file.rb +0 -100
- data/lib/remote_table/package.rb +0 -89
- data/lib/remote_table/request.rb +0 -44
- data/lib/remote_table/transform.rb +0 -58
- data/test/remote_table_test.rb +0 -386
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: remote_table
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
|
+
- 1
|
7
8
|
- 0
|
8
|
-
-
|
9
|
-
|
10
|
-
version: 0.2.32
|
9
|
+
- 0
|
10
|
+
version: 1.0.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Seamus Abshere
|
@@ -16,39 +16,38 @@ autorequire:
|
|
16
16
|
bindir: bin
|
17
17
|
cert_chain: []
|
18
18
|
|
19
|
-
date:
|
19
|
+
date: 2011-01-24 00:00:00 -06:00
|
20
20
|
default_executable:
|
21
21
|
dependencies:
|
22
22
|
- !ruby/object:Gem::Dependency
|
23
|
-
name:
|
23
|
+
name: activesupport
|
24
24
|
prerelease: false
|
25
25
|
requirement: &id001 !ruby/object:Gem::Requirement
|
26
26
|
none: false
|
27
27
|
requirements:
|
28
|
-
- - "
|
28
|
+
- - ">="
|
29
29
|
- !ruby/object:Gem::Version
|
30
|
-
hash:
|
30
|
+
hash: 11
|
31
31
|
segments:
|
32
|
-
-
|
32
|
+
- 2
|
33
33
|
- 3
|
34
|
-
-
|
35
|
-
version:
|
34
|
+
- 4
|
35
|
+
version: 2.3.4
|
36
36
|
type: :runtime
|
37
37
|
version_requirements: *id001
|
38
38
|
- !ruby/object:Gem::Dependency
|
39
|
-
name:
|
39
|
+
name: roo
|
40
40
|
prerelease: false
|
41
41
|
requirement: &id002 !ruby/object:Gem::Requirement
|
42
42
|
none: false
|
43
43
|
requirements:
|
44
|
-
- -
|
44
|
+
- - ~>
|
45
45
|
- !ruby/object:Gem::Version
|
46
|
-
hash:
|
46
|
+
hash: 29
|
47
47
|
segments:
|
48
|
-
-
|
49
|
-
-
|
50
|
-
|
51
|
-
version: 2.3.4
|
48
|
+
- 1
|
49
|
+
- 9
|
50
|
+
version: "1.9"
|
52
51
|
type: :runtime
|
53
52
|
version_requirements: *id002
|
54
53
|
- !ruby/object:Gem::Dependency
|
@@ -68,9 +67,51 @@ dependencies:
|
|
68
67
|
type: :runtime
|
69
68
|
version_requirements: *id003
|
70
69
|
- !ruby/object:Gem::Dependency
|
71
|
-
name:
|
70
|
+
name: i18n
|
72
71
|
prerelease: false
|
73
72
|
requirement: &id004 !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ">="
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
hash: 3
|
78
|
+
segments:
|
79
|
+
- 0
|
80
|
+
version: "0"
|
81
|
+
type: :runtime
|
82
|
+
version_requirements: *id004
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: builder
|
85
|
+
prerelease: false
|
86
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
87
|
+
none: false
|
88
|
+
requirements:
|
89
|
+
- - ">="
|
90
|
+
- !ruby/object:Gem::Version
|
91
|
+
hash: 3
|
92
|
+
segments:
|
93
|
+
- 0
|
94
|
+
version: "0"
|
95
|
+
type: :runtime
|
96
|
+
version_requirements: *id005
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: zip
|
99
|
+
prerelease: false
|
100
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
101
|
+
none: false
|
102
|
+
requirements:
|
103
|
+
- - ">="
|
104
|
+
- !ruby/object:Gem::Version
|
105
|
+
hash: 3
|
106
|
+
segments:
|
107
|
+
- 0
|
108
|
+
version: "0"
|
109
|
+
type: :runtime
|
110
|
+
version_requirements: *id006
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: nokogiri
|
113
|
+
prerelease: false
|
114
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
74
115
|
none: false
|
75
116
|
requirements:
|
76
117
|
- - ">="
|
@@ -82,11 +123,39 @@ dependencies:
|
|
82
123
|
- 1
|
83
124
|
version: 1.4.1
|
84
125
|
type: :runtime
|
85
|
-
version_requirements: *
|
126
|
+
version_requirements: *id007
|
127
|
+
- !ruby/object:Gem::Dependency
|
128
|
+
name: spreadsheet
|
129
|
+
prerelease: false
|
130
|
+
requirement: &id008 !ruby/object:Gem::Requirement
|
131
|
+
none: false
|
132
|
+
requirements:
|
133
|
+
- - ">="
|
134
|
+
- !ruby/object:Gem::Version
|
135
|
+
hash: 3
|
136
|
+
segments:
|
137
|
+
- 0
|
138
|
+
version: "0"
|
139
|
+
type: :runtime
|
140
|
+
version_requirements: *id008
|
141
|
+
- !ruby/object:Gem::Dependency
|
142
|
+
name: google-spreadsheet-ruby
|
143
|
+
prerelease: false
|
144
|
+
requirement: &id009 !ruby/object:Gem::Requirement
|
145
|
+
none: false
|
146
|
+
requirements:
|
147
|
+
- - ">="
|
148
|
+
- !ruby/object:Gem::Version
|
149
|
+
hash: 3
|
150
|
+
segments:
|
151
|
+
- 0
|
152
|
+
version: "0"
|
153
|
+
type: :runtime
|
154
|
+
version_requirements: *id009
|
86
155
|
- !ruby/object:Gem::Dependency
|
87
156
|
name: escape
|
88
157
|
prerelease: false
|
89
|
-
requirement: &
|
158
|
+
requirement: &id010 !ruby/object:Gem::Requirement
|
90
159
|
none: false
|
91
160
|
requirements:
|
92
161
|
- - ">="
|
@@ -98,11 +167,11 @@ dependencies:
|
|
98
167
|
- 4
|
99
168
|
version: 0.0.4
|
100
169
|
type: :runtime
|
101
|
-
version_requirements: *
|
170
|
+
version_requirements: *id010
|
102
171
|
- !ruby/object:Gem::Dependency
|
103
172
|
name: errata
|
104
173
|
prerelease: false
|
105
|
-
requirement: &
|
174
|
+
requirement: &id011 !ruby/object:Gem::Requirement
|
106
175
|
none: false
|
107
176
|
requirements:
|
108
177
|
- - ">="
|
@@ -114,48 +183,98 @@ dependencies:
|
|
114
183
|
- 0
|
115
184
|
version: 0.2.0
|
116
185
|
type: :development
|
117
|
-
version_requirements: *
|
118
|
-
|
119
|
-
|
186
|
+
version_requirements: *id011
|
187
|
+
- !ruby/object:Gem::Dependency
|
188
|
+
name: test-unit
|
189
|
+
prerelease: false
|
190
|
+
requirement: &id012 !ruby/object:Gem::Requirement
|
191
|
+
none: false
|
192
|
+
requirements:
|
193
|
+
- - ">="
|
194
|
+
- !ruby/object:Gem::Version
|
195
|
+
hash: 3
|
196
|
+
segments:
|
197
|
+
- 0
|
198
|
+
version: "0"
|
199
|
+
type: :development
|
200
|
+
version_requirements: *id012
|
201
|
+
- !ruby/object:Gem::Dependency
|
202
|
+
name: shoulda
|
203
|
+
prerelease: false
|
204
|
+
requirement: &id013 !ruby/object:Gem::Requirement
|
205
|
+
none: false
|
206
|
+
requirements:
|
207
|
+
- - ">="
|
208
|
+
- !ruby/object:Gem::Version
|
209
|
+
hash: 3
|
210
|
+
segments:
|
211
|
+
- 0
|
212
|
+
version: "0"
|
213
|
+
type: :development
|
214
|
+
version_requirements: *id013
|
215
|
+
- !ruby/object:Gem::Dependency
|
216
|
+
name: ruby-debug
|
217
|
+
prerelease: false
|
218
|
+
requirement: &id014 !ruby/object:Gem::Requirement
|
219
|
+
none: false
|
220
|
+
requirements:
|
221
|
+
- - ">="
|
222
|
+
- !ruby/object:Gem::Version
|
223
|
+
hash: 3
|
224
|
+
segments:
|
225
|
+
- 0
|
226
|
+
version: "0"
|
227
|
+
type: :development
|
228
|
+
version_requirements: *id014
|
229
|
+
description: Gives you a standard way to parse various formats and treat them as an array of hashes.
|
230
|
+
email:
|
231
|
+
- seamus@abshere.net
|
120
232
|
executables: []
|
121
233
|
|
122
234
|
extensions: []
|
123
235
|
|
124
|
-
extra_rdoc_files:
|
125
|
-
|
126
|
-
- README.rdoc
|
236
|
+
extra_rdoc_files: []
|
237
|
+
|
127
238
|
files:
|
128
239
|
- .document
|
129
240
|
- .gitignore
|
130
241
|
- CHANGELOG
|
242
|
+
- Gemfile
|
243
|
+
- Gemfile.lock
|
131
244
|
- LICENSE
|
132
245
|
- README.rdoc
|
133
246
|
- Rakefile
|
134
|
-
- VERSION
|
135
247
|
- lib/remote_table.rb
|
136
|
-
- lib/remote_table/
|
137
|
-
- lib/remote_table/
|
138
|
-
- lib/remote_table/
|
139
|
-
- lib/remote_table/
|
140
|
-
- lib/remote_table/
|
141
|
-
- lib/remote_table/
|
142
|
-
- lib/remote_table/
|
143
|
-
- lib/remote_table/
|
144
|
-
- lib/remote_table/
|
145
|
-
- lib/remote_table/
|
146
|
-
- lib/remote_table/
|
248
|
+
- lib/remote_table/cleaner.rb
|
249
|
+
- lib/remote_table/executor.rb
|
250
|
+
- lib/remote_table/format.rb
|
251
|
+
- lib/remote_table/format/delimited.rb
|
252
|
+
- lib/remote_table/format/excel.rb
|
253
|
+
- lib/remote_table/format/excelx.rb
|
254
|
+
- lib/remote_table/format/fixed_width.rb
|
255
|
+
- lib/remote_table/format/html.rb
|
256
|
+
- lib/remote_table/format/mixins/rooable.rb
|
257
|
+
- lib/remote_table/format/mixins/textual.rb
|
258
|
+
- lib/remote_table/format/open_office.rb
|
259
|
+
- lib/remote_table/hasher.rb
|
260
|
+
- lib/remote_table/local_file.rb
|
261
|
+
- lib/remote_table/properties.rb
|
262
|
+
- lib/remote_table/transformer.rb
|
263
|
+
- lib/remote_table/version.rb
|
147
264
|
- remote_table.gemspec
|
148
|
-
- test/
|
149
|
-
- test/
|
265
|
+
- test/helper.rb
|
266
|
+
- test/test_big.rb
|
267
|
+
- test/test_errata.rb
|
268
|
+
- test/test_old_syntax.rb
|
269
|
+
- test/test_old_transform.rb
|
270
|
+
- test/test_remote_table.rb
|
150
271
|
has_rdoc: true
|
151
|
-
homepage:
|
272
|
+
homepage: https://github.com/seamusabshere/remote_table
|
152
273
|
licenses: []
|
153
274
|
|
154
275
|
post_install_message:
|
155
|
-
rdoc_options:
|
156
|
-
|
157
|
-
- --line-numbers
|
158
|
-
- --inline-source
|
276
|
+
rdoc_options: []
|
277
|
+
|
159
278
|
require_paths:
|
160
279
|
- lib
|
161
280
|
required_ruby_version: !ruby/object:Gem::Requirement
|
@@ -176,13 +295,17 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
176
295
|
segments:
|
177
296
|
- 0
|
178
297
|
version: "0"
|
179
|
-
requirements:
|
180
|
-
|
298
|
+
requirements: []
|
299
|
+
|
181
300
|
rubyforge_project: remotetable
|
182
301
|
rubygems_version: 1.3.7
|
183
302
|
signing_key:
|
184
303
|
specification_version: 3
|
185
|
-
summary:
|
304
|
+
summary: Open local or remote XLSX, XLS, ODS, CSV and fixed-width files.
|
186
305
|
test_files:
|
187
|
-
- test/
|
188
|
-
- test/
|
306
|
+
- test/helper.rb
|
307
|
+
- test/test_big.rb
|
308
|
+
- test/test_errata.rb
|
309
|
+
- test/test_old_syntax.rb
|
310
|
+
- test/test_old_transform.rb
|
311
|
+
- test/test_remote_table.rb
|
data/VERSION
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
0.2.32
|
@@ -1,49 +0,0 @@
|
|
1
|
-
class RemoteTable
|
2
|
-
module Csv
|
3
|
-
def each_row(&block)
|
4
|
-
backup_file!
|
5
|
-
convert_file_to_utf8!
|
6
|
-
remove_useless_characters!
|
7
|
-
skip_rows!
|
8
|
-
FasterCSV.foreach(path, fastercsv_options) do |row|
|
9
|
-
ordered_hash = ActiveSupport::OrderedHash.new
|
10
|
-
filled_values = 0
|
11
|
-
case row
|
12
|
-
when FasterCSV::Row
|
13
|
-
row.each do |header, value|
|
14
|
-
next if header.blank?
|
15
|
-
value = '' if value.nil?
|
16
|
-
ordered_hash[header] = value
|
17
|
-
filled_values += 1 if value.present?
|
18
|
-
end
|
19
|
-
when Array
|
20
|
-
index = 0
|
21
|
-
row.each do |value|
|
22
|
-
value = '' if value.nil?
|
23
|
-
ordered_hash[index] = value
|
24
|
-
filled_values += 1 if value.present?
|
25
|
-
index += 1
|
26
|
-
end
|
27
|
-
else
|
28
|
-
raise "Unexpected #{row.inspect}"
|
29
|
-
end
|
30
|
-
yield ordered_hash if keep_blank_rows or filled_values.nonzero?
|
31
|
-
end
|
32
|
-
ensure
|
33
|
-
restore_file!
|
34
|
-
end
|
35
|
-
|
36
|
-
private
|
37
|
-
|
38
|
-
def fastercsv_options
|
39
|
-
fastercsv_options = { :skip_blanks => !keep_blank_rows }
|
40
|
-
if headers == false
|
41
|
-
fastercsv_options.merge!(:headers => nil)
|
42
|
-
else
|
43
|
-
fastercsv_options.merge!(:headers => :first_row)
|
44
|
-
end
|
45
|
-
fastercsv_options.merge!(:col_sep => delimiter) if delimiter
|
46
|
-
fastercsv_options
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
@@ -1,19 +0,0 @@
|
|
1
|
-
class RemoteTable
|
2
|
-
module FixedWidth
|
3
|
-
def each_row(&block)
|
4
|
-
backup_file!
|
5
|
-
convert_file_to_utf8!
|
6
|
-
remove_useless_characters!
|
7
|
-
crop_rows!
|
8
|
-
skip_rows!
|
9
|
-
cut_columns!
|
10
|
-
a = Slither.parse(path, schema_name)
|
11
|
-
a[:rows].each do |hash|
|
12
|
-
hash.reject! { |k, v| k.blank? }
|
13
|
-
yield hash if keep_blank_rows or hash.any? { |k, v| v.present? }
|
14
|
-
end
|
15
|
-
ensure
|
16
|
-
restore_file!
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
@@ -1,37 +0,0 @@
|
|
1
|
-
class RemoteTable
|
2
|
-
module Html
|
3
|
-
def each_row(&block)
|
4
|
-
backup_file!
|
5
|
-
convert_file_to_utf8!
|
6
|
-
remove_useless_characters!
|
7
|
-
html_headers = (headers.is_a?(Array)) ? headers : nil
|
8
|
-
Nokogiri::HTML(unescaped_html_without_soft_hyphens, nil, 'UTF-8').xpath(row_xpath).each do |row|
|
9
|
-
values = row.xpath(column_xpath).map { |td| td.content.gsub(/\s+/, ' ').strip }
|
10
|
-
if html_headers.nil?
|
11
|
-
html_headers = values
|
12
|
-
next
|
13
|
-
end
|
14
|
-
hash = zip html_headers, values
|
15
|
-
yield hash if keep_blank_rows or hash.any? { |k, v| v.present? }
|
16
|
-
end
|
17
|
-
ensure
|
18
|
-
restore_file!
|
19
|
-
end
|
20
|
-
|
21
|
-
private
|
22
|
-
|
23
|
-
# http://snippets.dzone.com/posts/show/406
|
24
|
-
def zip(keys, values)
|
25
|
-
hash = Hash.new
|
26
|
-
keys.zip(values) { |k,v| hash[k]=v }
|
27
|
-
hash
|
28
|
-
end
|
29
|
-
|
30
|
-
# should we be doing this in ruby?
|
31
|
-
def unescaped_html_without_soft_hyphens
|
32
|
-
str = CGI.unescapeHTML IO.read(path)
|
33
|
-
str.gsub! /­/, ''
|
34
|
-
str
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
@@ -1,44 +0,0 @@
|
|
1
|
-
class RemoteTable
|
2
|
-
module RooSpreadsheet
|
3
|
-
def each_row(&block)
|
4
|
-
oo = roo_klass.new(path, nil, :ignore)
|
5
|
-
oo.default_sheet = sheet.is_a?(Numeric) ? oo.sheets[sheet] : sheet
|
6
|
-
column_references = Hash.new
|
7
|
-
if headers == false
|
8
|
-
# zero-based numeric keys
|
9
|
-
for col in (1..oo.last_column)
|
10
|
-
column_references[col] = col - 1
|
11
|
-
end
|
12
|
-
elsif headers.is_a? Array
|
13
|
-
# names
|
14
|
-
for col in (1..oo.last_column)
|
15
|
-
column_references[col] = headers[col - 1]
|
16
|
-
end
|
17
|
-
else
|
18
|
-
# read headers from the file itself
|
19
|
-
for col in (1..oo.last_column)
|
20
|
-
column_references[col] = oo.cell(header_row, col)
|
21
|
-
column_references[col] = oo.cell(header_row - 1, col) if column_references[col].blank? # look up
|
22
|
-
end
|
23
|
-
end
|
24
|
-
first_data_row.upto(oo.last_row) do |raw_row|
|
25
|
-
ordered_hash = ActiveSupport::OrderedHash.new
|
26
|
-
for col in (1..oo.last_column)
|
27
|
-
next if column_references[col].blank?
|
28
|
-
ordered_hash[column_references[col]] = oo.cell(raw_row, col).to_s.gsub(/<[^>]+>/, '').strip
|
29
|
-
end
|
30
|
-
yield ordered_hash if keep_blank_rows or ordered_hash.any? { |k, v| v.present? }
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
private
|
35
|
-
|
36
|
-
def header_row
|
37
|
-
1 + skip.to_i
|
38
|
-
end
|
39
|
-
|
40
|
-
def first_data_row
|
41
|
-
1 + header_row
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
data/lib/remote_table/file.rb
DELETED
@@ -1,100 +0,0 @@
|
|
1
|
-
class RemoteTable
|
2
|
-
class File
|
3
|
-
attr_accessor :filename, :format, :delimiter, :skip, :cut, :crop, :sheet, :headers, :schema, :schema_name, :trap
|
4
|
-
attr_accessor :encoding
|
5
|
-
attr_accessor :path
|
6
|
-
attr_accessor :keep_blank_rows
|
7
|
-
attr_accessor :row_xpath
|
8
|
-
attr_accessor :column_xpath
|
9
|
-
|
10
|
-
def initialize(bus)
|
11
|
-
@filename = bus[:filename]
|
12
|
-
@format = bus[:format] || format_from_filename
|
13
|
-
@delimiter = bus[:delimiter]
|
14
|
-
@sheet = bus[:sheet] || 0
|
15
|
-
@skip = bus[:skip] # rows
|
16
|
-
@keep_blank_rows = bus[:keep_blank_rows] || false
|
17
|
-
@crop = bus[:crop] # rows
|
18
|
-
@cut = bus[:cut] # columns
|
19
|
-
@headers = bus[:headers]
|
20
|
-
@schema = bus[:schema]
|
21
|
-
@schema_name = bus[:schema_name]
|
22
|
-
@trap = bus[:trap]
|
23
|
-
@encoding = bus[:encoding] || 'UTF-8'
|
24
|
-
@row_xpath = bus[:row_xpath]
|
25
|
-
@column_xpath = bus[:column_xpath]
|
26
|
-
extend "RemoteTable::#{format.to_s.camelcase}".constantize
|
27
|
-
end
|
28
|
-
|
29
|
-
def tabulate(path)
|
30
|
-
define_fixed_width_schema! if format == :fixed_width and schema.is_a?(Array) # TODO move to generic subclass callback
|
31
|
-
self.path = path
|
32
|
-
self
|
33
|
-
end
|
34
|
-
|
35
|
-
private
|
36
|
-
|
37
|
-
# doesn't support trap
|
38
|
-
def define_fixed_width_schema!
|
39
|
-
raise "can't define both schema_name and schema" if !schema_name.blank?
|
40
|
-
self.schema_name = "autogenerated_#{filename.gsub(/[^a-z0-9_]/i, '')}".to_sym
|
41
|
-
self.trap ||= lambda { |_| true }
|
42
|
-
Slither.define schema_name do |d|
|
43
|
-
d.rows do |row|
|
44
|
-
row.trap(&trap)
|
45
|
-
schema.each do |name, width, options|
|
46
|
-
if name == 'spacer'
|
47
|
-
row.spacer width
|
48
|
-
else
|
49
|
-
row.column name, width, options
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
def backup_file!
|
57
|
-
FileUtils.cp path, "#{path}.backup"
|
58
|
-
end
|
59
|
-
|
60
|
-
def skip_rows!
|
61
|
-
return unless skip
|
62
|
-
RemoteTable.bang path, "tail -n +#{skip + 1}"
|
63
|
-
end
|
64
|
-
|
65
|
-
USELESS_CHARACTERS = [
|
66
|
-
'\xef\xbb\xbf', # UTF-8 byte order mark
|
67
|
-
'\xc2\xad' # soft hyphen, often inserted by MS Office (html: ­)
|
68
|
-
]
|
69
|
-
def remove_useless_characters!
|
70
|
-
RemoteTable.bang path, "perl -pe 's/#{USELESS_CHARACTERS.join '//g; s/'}//g'"
|
71
|
-
end
|
72
|
-
|
73
|
-
def convert_file_to_utf8!
|
74
|
-
RemoteTable.bang path, "iconv -c -f #{Escape.shell_single_word encoding} -t UTF-8"
|
75
|
-
end
|
76
|
-
|
77
|
-
def restore_file!
|
78
|
-
FileUtils.mv "#{path}.backup", path if ::File.readable? "#{path}.backup"
|
79
|
-
end
|
80
|
-
|
81
|
-
def cut_columns!
|
82
|
-
return unless cut
|
83
|
-
RemoteTable.bang path, "cut -c #{Escape.shell_single_word cut.to_s}"
|
84
|
-
end
|
85
|
-
|
86
|
-
def crop_rows!
|
87
|
-
return unless crop
|
88
|
-
RemoteTable.bang path, "tail -n +#{Escape.shell_single_word crop.first.to_s} | head -n #{crop.last - crop.first + 1}"
|
89
|
-
end
|
90
|
-
|
91
|
-
def format_from_filename
|
92
|
-
extname = ::File.extname(filename).gsub('.', '')
|
93
|
-
return :csv if extname.blank?
|
94
|
-
format = [ :xls, :ods, :xlsx ].detect { |i| i == extname.to_sym }
|
95
|
-
format = :html if extname =~ /\Ahtm/
|
96
|
-
format = :csv if format.blank?
|
97
|
-
format
|
98
|
-
end
|
99
|
-
end
|
100
|
-
end
|