csvkit 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,6 @@
1
1
  class CSVKit
2
2
  require 'nokogiri'
3
+ require 'iconv'
3
4
 
4
5
  attr_accessor :content
5
6
 
@@ -15,17 +16,35 @@ class CSVKit
15
16
 
16
17
  doc = Nokogiri::HTML(@content)
17
18
 
18
- result = ""
19
-
20
- doc.xpath('//table//tr').each do |row|
21
- row.xpath('td').each do |cell|
22
- result += '"' + cell.text.gsub("\n", ' ').gsub('"', '\"').gsub(/(\s){2,}/m, '\1').gsub(/[\,\$]/, '') + "\", "
19
+ tsv_str = CSV.generate(col_sep: "\t", headers: :first_row, encoding: 'utf-8') do |tsv|
20
+ doc.xpath('//table//tr').each do |row|
21
+ tsv_row = []
22
+ row.xpath('td | th').each do |cell|
23
+ tsv_row << clean_cell_string(cell.text)
24
+ end
25
+ tsv << tsv_row
23
26
  end
24
- result += "\n"
25
27
  end
26
28
 
27
- raise "command failed: #{invoke}" if result.to_s.strip.empty?
28
- return result
29
+ raise "command failed" if tsv_str.to_s.strip.empty?
30
+
31
+ write_content = Iconv.conv("utf-16le", "utf-8", "\xEF\xBB\xBF")
32
+ write_content += Iconv.conv("utf-16le", "utf-8", tsv_str)
33
+ write_content
34
+ return write_content
29
35
  end
30
36
 
37
+ def clean_cell_string(cell_string)
38
+ cell_string = cell_string.gsub(/[[:space:]]/, ' ')
39
+ cell_string = cell_string.gsub("\n", ' ')
40
+ cell_string = cell_string.gsub('"', '\"')
41
+ cell_string = cell_string.gsub(/(\s){2,}/m, '\1')
42
+ cell_string = cell_string.gsub(/[\,\$]/, '')
43
+ is_numeric?(cell_string) ? cell_string.to_b : cell_string.strip
44
+ end
45
+
46
+ def is_numeric?(string)
47
+ return true if self =~ /^\d+$/
48
+ true if Float(self) rescue false
49
+ end
31
50
  end
@@ -1,3 +1,3 @@
1
1
  class CSVKit
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csvkit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-12-20 00:00:00.000000000 Z
12
+ date: 2012-12-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri