csvkit 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/csvkit/csvkit.rb +27 -8
- data/lib/csvkit/version.rb +1 -1
- metadata +2 -2
data/lib/csvkit/csvkit.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
class CSVKit
|
2
2
|
require 'nokogiri'
|
3
|
+
require 'iconv'
|
3
4
|
|
4
5
|
attr_accessor :content
|
5
6
|
|
@@ -15,17 +16,35 @@ class CSVKit
|
|
15
16
|
|
16
17
|
doc = Nokogiri::HTML(@content)
|
17
18
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
19
|
+
tsv_str = CSV.generate(col_sep: "\t", headers: :first_row, encoding: 'utf-8') do |tsv|
|
20
|
+
doc.xpath('//table//tr').each do |row|
|
21
|
+
tsv_row = []
|
22
|
+
row.xpath('td | th').each do |cell|
|
23
|
+
tsv_row << clean_cell_string(cell.text)
|
24
|
+
end
|
25
|
+
tsv << tsv_row
|
23
26
|
end
|
24
|
-
result += "\n"
|
25
27
|
end
|
26
28
|
|
27
|
-
raise "command failed
|
28
|
-
|
29
|
+
raise "command failed" if tsv_str.to_s.strip.empty?
|
30
|
+
|
31
|
+
write_content = Iconv.conv("utf-16le", "utf-8", "\xEF\xBB\xBF")
|
32
|
+
write_content += Iconv.conv("utf-16le", "utf-8", tsv_str)
|
33
|
+
write_content
|
34
|
+
return write_content
|
29
35
|
end
|
30
36
|
|
37
|
+
def clean_cell_string(cell_string)
|
38
|
+
cell_string = cell_string.gsub(/[[:space:]]/, ' ')
|
39
|
+
cell_string = cell_string.gsub("\n", ' ')
|
40
|
+
cell_string = cell_string.gsub('"', '\"')
|
41
|
+
cell_string = cell_string.gsub(/(\s){2,}/m, '\1')
|
42
|
+
cell_string = cell_string.gsub(/[\,\$]/, '')
|
43
|
+
is_numeric?(cell_string) ? cell_string.to_b : cell_string.strip
|
44
|
+
end
|
45
|
+
|
46
|
+
def is_numeric?(string)
|
47
|
+
return true if self =~ /^\d+$/
|
48
|
+
true if Float(self) rescue false
|
49
|
+
end
|
31
50
|
end
|
data/lib/csvkit/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csvkit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-12-
|
12
|
+
date: 2012-12-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|