remote_table 0.2.32 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +5 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +65 -0
- data/LICENSE +1 -1
- data/README.rdoc +21 -7
- data/Rakefile +12 -61
- data/lib/remote_table/cleaner.rb +19 -0
- data/lib/remote_table/executor.rb +29 -0
- data/lib/remote_table/format/delimited.rb +62 -0
- data/lib/remote_table/format/excel.rb +10 -0
- data/lib/remote_table/format/excelx.rb +10 -0
- data/lib/remote_table/format/fixed_width.rb +47 -0
- data/lib/remote_table/format/html.rb +43 -0
- data/lib/remote_table/format/mixins/rooable.rb +47 -0
- data/lib/remote_table/format/mixins/textual.rb +34 -0
- data/lib/remote_table/format/open_office.rb +10 -0
- data/lib/remote_table/format.rb +35 -0
- data/lib/remote_table/hasher.rb +25 -0
- data/lib/remote_table/local_file.rb +92 -0
- data/lib/remote_table/properties.rb +209 -0
- data/lib/remote_table/transformer.rb +17 -0
- data/lib/remote_table/version.rb +3 -0
- data/lib/remote_table.rb +91 -99
- data/remote_table.gemspec +32 -77
- data/test/{test_helper.rb → helper.rb} +9 -2
- data/test/test_big.rb +61 -0
- data/test/test_errata.rb +46 -0
- data/test/test_old_syntax.rb +229 -0
- data/test/test_old_transform.rb +49 -0
- data/test/test_remote_table.rb +13 -0
- metadata +176 -53
- data/VERSION +0 -1
- data/lib/remote_table/file/csv.rb +0 -49
- data/lib/remote_table/file/fixed_width.rb +0 -19
- data/lib/remote_table/file/html.rb +0 -37
- data/lib/remote_table/file/ods.rb +0 -11
- data/lib/remote_table/file/roo_spreadsheet.rb +0 -44
- data/lib/remote_table/file/xls.rb +0 -11
- data/lib/remote_table/file/xlsx.rb +0 -11
- data/lib/remote_table/file.rb +0 -100
- data/lib/remote_table/package.rb +0 -89
- data/lib/remote_table/request.rb +0 -44
- data/lib/remote_table/transform.rb +0 -58
- data/test/remote_table_test.rb +0 -386
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: remote_table
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
|
+
- 1
|
7
8
|
- 0
|
8
|
-
-
|
9
|
-
|
10
|
-
version: 0.2.32
|
9
|
+
- 0
|
10
|
+
version: 1.0.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Seamus Abshere
|
@@ -16,39 +16,38 @@ autorequire:
|
|
16
16
|
bindir: bin
|
17
17
|
cert_chain: []
|
18
18
|
|
19
|
-
date:
|
19
|
+
date: 2011-01-24 00:00:00 -06:00
|
20
20
|
default_executable:
|
21
21
|
dependencies:
|
22
22
|
- !ruby/object:Gem::Dependency
|
23
|
-
name:
|
23
|
+
name: activesupport
|
24
24
|
prerelease: false
|
25
25
|
requirement: &id001 !ruby/object:Gem::Requirement
|
26
26
|
none: false
|
27
27
|
requirements:
|
28
|
-
- - "
|
28
|
+
- - ">="
|
29
29
|
- !ruby/object:Gem::Version
|
30
|
-
hash:
|
30
|
+
hash: 11
|
31
31
|
segments:
|
32
|
-
-
|
32
|
+
- 2
|
33
33
|
- 3
|
34
|
-
-
|
35
|
-
version:
|
34
|
+
- 4
|
35
|
+
version: 2.3.4
|
36
36
|
type: :runtime
|
37
37
|
version_requirements: *id001
|
38
38
|
- !ruby/object:Gem::Dependency
|
39
|
-
name:
|
39
|
+
name: roo
|
40
40
|
prerelease: false
|
41
41
|
requirement: &id002 !ruby/object:Gem::Requirement
|
42
42
|
none: false
|
43
43
|
requirements:
|
44
|
-
- -
|
44
|
+
- - ~>
|
45
45
|
- !ruby/object:Gem::Version
|
46
|
-
hash:
|
46
|
+
hash: 29
|
47
47
|
segments:
|
48
|
-
-
|
49
|
-
-
|
50
|
-
|
51
|
-
version: 2.3.4
|
48
|
+
- 1
|
49
|
+
- 9
|
50
|
+
version: "1.9"
|
52
51
|
type: :runtime
|
53
52
|
version_requirements: *id002
|
54
53
|
- !ruby/object:Gem::Dependency
|
@@ -68,9 +67,51 @@ dependencies:
|
|
68
67
|
type: :runtime
|
69
68
|
version_requirements: *id003
|
70
69
|
- !ruby/object:Gem::Dependency
|
71
|
-
name:
|
70
|
+
name: i18n
|
72
71
|
prerelease: false
|
73
72
|
requirement: &id004 !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ">="
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
hash: 3
|
78
|
+
segments:
|
79
|
+
- 0
|
80
|
+
version: "0"
|
81
|
+
type: :runtime
|
82
|
+
version_requirements: *id004
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: builder
|
85
|
+
prerelease: false
|
86
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
87
|
+
none: false
|
88
|
+
requirements:
|
89
|
+
- - ">="
|
90
|
+
- !ruby/object:Gem::Version
|
91
|
+
hash: 3
|
92
|
+
segments:
|
93
|
+
- 0
|
94
|
+
version: "0"
|
95
|
+
type: :runtime
|
96
|
+
version_requirements: *id005
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: zip
|
99
|
+
prerelease: false
|
100
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
101
|
+
none: false
|
102
|
+
requirements:
|
103
|
+
- - ">="
|
104
|
+
- !ruby/object:Gem::Version
|
105
|
+
hash: 3
|
106
|
+
segments:
|
107
|
+
- 0
|
108
|
+
version: "0"
|
109
|
+
type: :runtime
|
110
|
+
version_requirements: *id006
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: nokogiri
|
113
|
+
prerelease: false
|
114
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
74
115
|
none: false
|
75
116
|
requirements:
|
76
117
|
- - ">="
|
@@ -82,11 +123,39 @@ dependencies:
|
|
82
123
|
- 1
|
83
124
|
version: 1.4.1
|
84
125
|
type: :runtime
|
85
|
-
version_requirements: *
|
126
|
+
version_requirements: *id007
|
127
|
+
- !ruby/object:Gem::Dependency
|
128
|
+
name: spreadsheet
|
129
|
+
prerelease: false
|
130
|
+
requirement: &id008 !ruby/object:Gem::Requirement
|
131
|
+
none: false
|
132
|
+
requirements:
|
133
|
+
- - ">="
|
134
|
+
- !ruby/object:Gem::Version
|
135
|
+
hash: 3
|
136
|
+
segments:
|
137
|
+
- 0
|
138
|
+
version: "0"
|
139
|
+
type: :runtime
|
140
|
+
version_requirements: *id008
|
141
|
+
- !ruby/object:Gem::Dependency
|
142
|
+
name: google-spreadsheet-ruby
|
143
|
+
prerelease: false
|
144
|
+
requirement: &id009 !ruby/object:Gem::Requirement
|
145
|
+
none: false
|
146
|
+
requirements:
|
147
|
+
- - ">="
|
148
|
+
- !ruby/object:Gem::Version
|
149
|
+
hash: 3
|
150
|
+
segments:
|
151
|
+
- 0
|
152
|
+
version: "0"
|
153
|
+
type: :runtime
|
154
|
+
version_requirements: *id009
|
86
155
|
- !ruby/object:Gem::Dependency
|
87
156
|
name: escape
|
88
157
|
prerelease: false
|
89
|
-
requirement: &
|
158
|
+
requirement: &id010 !ruby/object:Gem::Requirement
|
90
159
|
none: false
|
91
160
|
requirements:
|
92
161
|
- - ">="
|
@@ -98,11 +167,11 @@ dependencies:
|
|
98
167
|
- 4
|
99
168
|
version: 0.0.4
|
100
169
|
type: :runtime
|
101
|
-
version_requirements: *
|
170
|
+
version_requirements: *id010
|
102
171
|
- !ruby/object:Gem::Dependency
|
103
172
|
name: errata
|
104
173
|
prerelease: false
|
105
|
-
requirement: &
|
174
|
+
requirement: &id011 !ruby/object:Gem::Requirement
|
106
175
|
none: false
|
107
176
|
requirements:
|
108
177
|
- - ">="
|
@@ -114,48 +183,98 @@ dependencies:
|
|
114
183
|
- 0
|
115
184
|
version: 0.2.0
|
116
185
|
type: :development
|
117
|
-
version_requirements: *
|
118
|
-
|
119
|
-
|
186
|
+
version_requirements: *id011
|
187
|
+
- !ruby/object:Gem::Dependency
|
188
|
+
name: test-unit
|
189
|
+
prerelease: false
|
190
|
+
requirement: &id012 !ruby/object:Gem::Requirement
|
191
|
+
none: false
|
192
|
+
requirements:
|
193
|
+
- - ">="
|
194
|
+
- !ruby/object:Gem::Version
|
195
|
+
hash: 3
|
196
|
+
segments:
|
197
|
+
- 0
|
198
|
+
version: "0"
|
199
|
+
type: :development
|
200
|
+
version_requirements: *id012
|
201
|
+
- !ruby/object:Gem::Dependency
|
202
|
+
name: shoulda
|
203
|
+
prerelease: false
|
204
|
+
requirement: &id013 !ruby/object:Gem::Requirement
|
205
|
+
none: false
|
206
|
+
requirements:
|
207
|
+
- - ">="
|
208
|
+
- !ruby/object:Gem::Version
|
209
|
+
hash: 3
|
210
|
+
segments:
|
211
|
+
- 0
|
212
|
+
version: "0"
|
213
|
+
type: :development
|
214
|
+
version_requirements: *id013
|
215
|
+
- !ruby/object:Gem::Dependency
|
216
|
+
name: ruby-debug
|
217
|
+
prerelease: false
|
218
|
+
requirement: &id014 !ruby/object:Gem::Requirement
|
219
|
+
none: false
|
220
|
+
requirements:
|
221
|
+
- - ">="
|
222
|
+
- !ruby/object:Gem::Version
|
223
|
+
hash: 3
|
224
|
+
segments:
|
225
|
+
- 0
|
226
|
+
version: "0"
|
227
|
+
type: :development
|
228
|
+
version_requirements: *id014
|
229
|
+
description: Gives you a standard way to parse various formats and treat them as an array of hashes.
|
230
|
+
email:
|
231
|
+
- seamus@abshere.net
|
120
232
|
executables: []
|
121
233
|
|
122
234
|
extensions: []
|
123
235
|
|
124
|
-
extra_rdoc_files:
|
125
|
-
|
126
|
-
- README.rdoc
|
236
|
+
extra_rdoc_files: []
|
237
|
+
|
127
238
|
files:
|
128
239
|
- .document
|
129
240
|
- .gitignore
|
130
241
|
- CHANGELOG
|
242
|
+
- Gemfile
|
243
|
+
- Gemfile.lock
|
131
244
|
- LICENSE
|
132
245
|
- README.rdoc
|
133
246
|
- Rakefile
|
134
|
-
- VERSION
|
135
247
|
- lib/remote_table.rb
|
136
|
-
- lib/remote_table/
|
137
|
-
- lib/remote_table/
|
138
|
-
- lib/remote_table/
|
139
|
-
- lib/remote_table/
|
140
|
-
- lib/remote_table/
|
141
|
-
- lib/remote_table/
|
142
|
-
- lib/remote_table/
|
143
|
-
- lib/remote_table/
|
144
|
-
- lib/remote_table/
|
145
|
-
- lib/remote_table/
|
146
|
-
- lib/remote_table/
|
248
|
+
- lib/remote_table/cleaner.rb
|
249
|
+
- lib/remote_table/executor.rb
|
250
|
+
- lib/remote_table/format.rb
|
251
|
+
- lib/remote_table/format/delimited.rb
|
252
|
+
- lib/remote_table/format/excel.rb
|
253
|
+
- lib/remote_table/format/excelx.rb
|
254
|
+
- lib/remote_table/format/fixed_width.rb
|
255
|
+
- lib/remote_table/format/html.rb
|
256
|
+
- lib/remote_table/format/mixins/rooable.rb
|
257
|
+
- lib/remote_table/format/mixins/textual.rb
|
258
|
+
- lib/remote_table/format/open_office.rb
|
259
|
+
- lib/remote_table/hasher.rb
|
260
|
+
- lib/remote_table/local_file.rb
|
261
|
+
- lib/remote_table/properties.rb
|
262
|
+
- lib/remote_table/transformer.rb
|
263
|
+
- lib/remote_table/version.rb
|
147
264
|
- remote_table.gemspec
|
148
|
-
- test/
|
149
|
-
- test/
|
265
|
+
- test/helper.rb
|
266
|
+
- test/test_big.rb
|
267
|
+
- test/test_errata.rb
|
268
|
+
- test/test_old_syntax.rb
|
269
|
+
- test/test_old_transform.rb
|
270
|
+
- test/test_remote_table.rb
|
150
271
|
has_rdoc: true
|
151
|
-
homepage:
|
272
|
+
homepage: https://github.com/seamusabshere/remote_table
|
152
273
|
licenses: []
|
153
274
|
|
154
275
|
post_install_message:
|
155
|
-
rdoc_options:
|
156
|
-
|
157
|
-
- --line-numbers
|
158
|
-
- --inline-source
|
276
|
+
rdoc_options: []
|
277
|
+
|
159
278
|
require_paths:
|
160
279
|
- lib
|
161
280
|
required_ruby_version: !ruby/object:Gem::Requirement
|
@@ -176,13 +295,17 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
176
295
|
segments:
|
177
296
|
- 0
|
178
297
|
version: "0"
|
179
|
-
requirements:
|
180
|
-
|
298
|
+
requirements: []
|
299
|
+
|
181
300
|
rubyforge_project: remotetable
|
182
301
|
rubygems_version: 1.3.7
|
183
302
|
signing_key:
|
184
303
|
specification_version: 3
|
185
|
-
summary:
|
304
|
+
summary: Open local or remote XLSX, XLS, ODS, CSV and fixed-width files.
|
186
305
|
test_files:
|
187
|
-
- test/
|
188
|
-
- test/
|
306
|
+
- test/helper.rb
|
307
|
+
- test/test_big.rb
|
308
|
+
- test/test_errata.rb
|
309
|
+
- test/test_old_syntax.rb
|
310
|
+
- test/test_old_transform.rb
|
311
|
+
- test/test_remote_table.rb
|
data/VERSION
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
0.2.32
|
@@ -1,49 +0,0 @@
|
|
1
|
-
class RemoteTable
|
2
|
-
module Csv
|
3
|
-
def each_row(&block)
|
4
|
-
backup_file!
|
5
|
-
convert_file_to_utf8!
|
6
|
-
remove_useless_characters!
|
7
|
-
skip_rows!
|
8
|
-
FasterCSV.foreach(path, fastercsv_options) do |row|
|
9
|
-
ordered_hash = ActiveSupport::OrderedHash.new
|
10
|
-
filled_values = 0
|
11
|
-
case row
|
12
|
-
when FasterCSV::Row
|
13
|
-
row.each do |header, value|
|
14
|
-
next if header.blank?
|
15
|
-
value = '' if value.nil?
|
16
|
-
ordered_hash[header] = value
|
17
|
-
filled_values += 1 if value.present?
|
18
|
-
end
|
19
|
-
when Array
|
20
|
-
index = 0
|
21
|
-
row.each do |value|
|
22
|
-
value = '' if value.nil?
|
23
|
-
ordered_hash[index] = value
|
24
|
-
filled_values += 1 if value.present?
|
25
|
-
index += 1
|
26
|
-
end
|
27
|
-
else
|
28
|
-
raise "Unexpected #{row.inspect}"
|
29
|
-
end
|
30
|
-
yield ordered_hash if keep_blank_rows or filled_values.nonzero?
|
31
|
-
end
|
32
|
-
ensure
|
33
|
-
restore_file!
|
34
|
-
end
|
35
|
-
|
36
|
-
private
|
37
|
-
|
38
|
-
def fastercsv_options
|
39
|
-
fastercsv_options = { :skip_blanks => !keep_blank_rows }
|
40
|
-
if headers == false
|
41
|
-
fastercsv_options.merge!(:headers => nil)
|
42
|
-
else
|
43
|
-
fastercsv_options.merge!(:headers => :first_row)
|
44
|
-
end
|
45
|
-
fastercsv_options.merge!(:col_sep => delimiter) if delimiter
|
46
|
-
fastercsv_options
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
@@ -1,19 +0,0 @@
|
|
1
|
-
class RemoteTable
|
2
|
-
module FixedWidth
|
3
|
-
def each_row(&block)
|
4
|
-
backup_file!
|
5
|
-
convert_file_to_utf8!
|
6
|
-
remove_useless_characters!
|
7
|
-
crop_rows!
|
8
|
-
skip_rows!
|
9
|
-
cut_columns!
|
10
|
-
a = Slither.parse(path, schema_name)
|
11
|
-
a[:rows].each do |hash|
|
12
|
-
hash.reject! { |k, v| k.blank? }
|
13
|
-
yield hash if keep_blank_rows or hash.any? { |k, v| v.present? }
|
14
|
-
end
|
15
|
-
ensure
|
16
|
-
restore_file!
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
@@ -1,37 +0,0 @@
|
|
1
|
-
class RemoteTable
|
2
|
-
module Html
|
3
|
-
def each_row(&block)
|
4
|
-
backup_file!
|
5
|
-
convert_file_to_utf8!
|
6
|
-
remove_useless_characters!
|
7
|
-
html_headers = (headers.is_a?(Array)) ? headers : nil
|
8
|
-
Nokogiri::HTML(unescaped_html_without_soft_hyphens, nil, 'UTF-8').xpath(row_xpath).each do |row|
|
9
|
-
values = row.xpath(column_xpath).map { |td| td.content.gsub(/\s+/, ' ').strip }
|
10
|
-
if html_headers.nil?
|
11
|
-
html_headers = values
|
12
|
-
next
|
13
|
-
end
|
14
|
-
hash = zip html_headers, values
|
15
|
-
yield hash if keep_blank_rows or hash.any? { |k, v| v.present? }
|
16
|
-
end
|
17
|
-
ensure
|
18
|
-
restore_file!
|
19
|
-
end
|
20
|
-
|
21
|
-
private
|
22
|
-
|
23
|
-
# http://snippets.dzone.com/posts/show/406
|
24
|
-
def zip(keys, values)
|
25
|
-
hash = Hash.new
|
26
|
-
keys.zip(values) { |k,v| hash[k]=v }
|
27
|
-
hash
|
28
|
-
end
|
29
|
-
|
30
|
-
# should we be doing this in ruby?
|
31
|
-
def unescaped_html_without_soft_hyphens
|
32
|
-
str = CGI.unescapeHTML IO.read(path)
|
33
|
-
str.gsub! /­/, ''
|
34
|
-
str
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
@@ -1,44 +0,0 @@
|
|
1
|
-
class RemoteTable
|
2
|
-
module RooSpreadsheet
|
3
|
-
def each_row(&block)
|
4
|
-
oo = roo_klass.new(path, nil, :ignore)
|
5
|
-
oo.default_sheet = sheet.is_a?(Numeric) ? oo.sheets[sheet] : sheet
|
6
|
-
column_references = Hash.new
|
7
|
-
if headers == false
|
8
|
-
# zero-based numeric keys
|
9
|
-
for col in (1..oo.last_column)
|
10
|
-
column_references[col] = col - 1
|
11
|
-
end
|
12
|
-
elsif headers.is_a? Array
|
13
|
-
# names
|
14
|
-
for col in (1..oo.last_column)
|
15
|
-
column_references[col] = headers[col - 1]
|
16
|
-
end
|
17
|
-
else
|
18
|
-
# read headers from the file itself
|
19
|
-
for col in (1..oo.last_column)
|
20
|
-
column_references[col] = oo.cell(header_row, col)
|
21
|
-
column_references[col] = oo.cell(header_row - 1, col) if column_references[col].blank? # look up
|
22
|
-
end
|
23
|
-
end
|
24
|
-
first_data_row.upto(oo.last_row) do |raw_row|
|
25
|
-
ordered_hash = ActiveSupport::OrderedHash.new
|
26
|
-
for col in (1..oo.last_column)
|
27
|
-
next if column_references[col].blank?
|
28
|
-
ordered_hash[column_references[col]] = oo.cell(raw_row, col).to_s.gsub(/<[^>]+>/, '').strip
|
29
|
-
end
|
30
|
-
yield ordered_hash if keep_blank_rows or ordered_hash.any? { |k, v| v.present? }
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
private
|
35
|
-
|
36
|
-
def header_row
|
37
|
-
1 + skip.to_i
|
38
|
-
end
|
39
|
-
|
40
|
-
def first_data_row
|
41
|
-
1 + header_row
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
data/lib/remote_table/file.rb
DELETED
@@ -1,100 +0,0 @@
|
|
1
|
-
class RemoteTable
|
2
|
-
class File
|
3
|
-
attr_accessor :filename, :format, :delimiter, :skip, :cut, :crop, :sheet, :headers, :schema, :schema_name, :trap
|
4
|
-
attr_accessor :encoding
|
5
|
-
attr_accessor :path
|
6
|
-
attr_accessor :keep_blank_rows
|
7
|
-
attr_accessor :row_xpath
|
8
|
-
attr_accessor :column_xpath
|
9
|
-
|
10
|
-
def initialize(bus)
|
11
|
-
@filename = bus[:filename]
|
12
|
-
@format = bus[:format] || format_from_filename
|
13
|
-
@delimiter = bus[:delimiter]
|
14
|
-
@sheet = bus[:sheet] || 0
|
15
|
-
@skip = bus[:skip] # rows
|
16
|
-
@keep_blank_rows = bus[:keep_blank_rows] || false
|
17
|
-
@crop = bus[:crop] # rows
|
18
|
-
@cut = bus[:cut] # columns
|
19
|
-
@headers = bus[:headers]
|
20
|
-
@schema = bus[:schema]
|
21
|
-
@schema_name = bus[:schema_name]
|
22
|
-
@trap = bus[:trap]
|
23
|
-
@encoding = bus[:encoding] || 'UTF-8'
|
24
|
-
@row_xpath = bus[:row_xpath]
|
25
|
-
@column_xpath = bus[:column_xpath]
|
26
|
-
extend "RemoteTable::#{format.to_s.camelcase}".constantize
|
27
|
-
end
|
28
|
-
|
29
|
-
def tabulate(path)
|
30
|
-
define_fixed_width_schema! if format == :fixed_width and schema.is_a?(Array) # TODO move to generic subclass callback
|
31
|
-
self.path = path
|
32
|
-
self
|
33
|
-
end
|
34
|
-
|
35
|
-
private
|
36
|
-
|
37
|
-
# doesn't support trap
|
38
|
-
def define_fixed_width_schema!
|
39
|
-
raise "can't define both schema_name and schema" if !schema_name.blank?
|
40
|
-
self.schema_name = "autogenerated_#{filename.gsub(/[^a-z0-9_]/i, '')}".to_sym
|
41
|
-
self.trap ||= lambda { |_| true }
|
42
|
-
Slither.define schema_name do |d|
|
43
|
-
d.rows do |row|
|
44
|
-
row.trap(&trap)
|
45
|
-
schema.each do |name, width, options|
|
46
|
-
if name == 'spacer'
|
47
|
-
row.spacer width
|
48
|
-
else
|
49
|
-
row.column name, width, options
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
def backup_file!
|
57
|
-
FileUtils.cp path, "#{path}.backup"
|
58
|
-
end
|
59
|
-
|
60
|
-
def skip_rows!
|
61
|
-
return unless skip
|
62
|
-
RemoteTable.bang path, "tail -n +#{skip + 1}"
|
63
|
-
end
|
64
|
-
|
65
|
-
USELESS_CHARACTERS = [
|
66
|
-
'\xef\xbb\xbf', # UTF-8 byte order mark
|
67
|
-
'\xc2\xad' # soft hyphen, often inserted by MS Office (html: ­)
|
68
|
-
]
|
69
|
-
def remove_useless_characters!
|
70
|
-
RemoteTable.bang path, "perl -pe 's/#{USELESS_CHARACTERS.join '//g; s/'}//g'"
|
71
|
-
end
|
72
|
-
|
73
|
-
def convert_file_to_utf8!
|
74
|
-
RemoteTable.bang path, "iconv -c -f #{Escape.shell_single_word encoding} -t UTF-8"
|
75
|
-
end
|
76
|
-
|
77
|
-
def restore_file!
|
78
|
-
FileUtils.mv "#{path}.backup", path if ::File.readable? "#{path}.backup"
|
79
|
-
end
|
80
|
-
|
81
|
-
def cut_columns!
|
82
|
-
return unless cut
|
83
|
-
RemoteTable.bang path, "cut -c #{Escape.shell_single_word cut.to_s}"
|
84
|
-
end
|
85
|
-
|
86
|
-
def crop_rows!
|
87
|
-
return unless crop
|
88
|
-
RemoteTable.bang path, "tail -n +#{Escape.shell_single_word crop.first.to_s} | head -n #{crop.last - crop.first + 1}"
|
89
|
-
end
|
90
|
-
|
91
|
-
def format_from_filename
|
92
|
-
extname = ::File.extname(filename).gsub('.', '')
|
93
|
-
return :csv if extname.blank?
|
94
|
-
format = [ :xls, :ods, :xlsx ].detect { |i| i == extname.to_sym }
|
95
|
-
format = :html if extname =~ /\Ahtm/
|
96
|
-
format = :csv if format.blank?
|
97
|
-
format
|
98
|
-
end
|
99
|
-
end
|
100
|
-
end
|