csvkit 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,6 @@
1
1
  class CSVKit
2
2
  require 'nokogiri'
3
+ require 'iconv'
3
4
 
4
5
  attr_accessor :content
5
6
 
@@ -15,17 +16,35 @@ class CSVKit
15
16
 
16
17
  doc = Nokogiri::HTML(@content)
17
18
 
18
- result = ""
19
-
20
- doc.xpath('//table//tr').each do |row|
21
- row.xpath('td').each do |cell|
22
- result += '"' + cell.text.gsub("\n", ' ').gsub('"', '\"').gsub(/(\s){2,}/m, '\1').gsub(/[\,\$]/, '') + "\", "
19
+ tsv_str = CSV.generate(col_sep: "\t", headers: :first_row, encoding: 'utf-8') do |tsv|
20
+ doc.xpath('//table//tr').each do |row|
21
+ tsv_row = []
22
+ row.xpath('td | th').each do |cell|
23
+ tsv_row << clean_cell_string(cell.text)
24
+ end
25
+ tsv << tsv_row
23
26
  end
24
- result += "\n"
25
27
  end
26
28
 
27
- raise "command failed: #{invoke}" if result.to_s.strip.empty?
28
- return result
29
+ raise "command failed" if tsv_str.to_s.strip.empty?
30
+
31
+ write_content = Iconv.conv("utf-16le", "utf-8", "\xEF\xBB\xBF")
32
+ write_content += Iconv.conv("utf-16le", "utf-8", tsv_str)
33
+ write_content
34
+ return write_content
29
35
  end
30
36
 
37
+ def clean_cell_string(cell_string)
38
+ cell_string = cell_string.gsub(/[[:space:]]/, ' ')
39
+ cell_string = cell_string.gsub("\n", ' ')
40
+ cell_string = cell_string.gsub('"', '\"')
41
+ cell_string = cell_string.gsub(/(\s){2,}/m, '\1')
42
+ cell_string = cell_string.gsub(/[\,\$]/, '')
43
+ is_numeric?(cell_string) ? cell_string.to_b : cell_string.strip
44
+ end
45
+
46
+ def is_numeric?(string)
47
+ return true if self =~ /^\d+$/
48
+ true if Float(self) rescue false
49
+ end
31
50
  end
@@ -1,3 +1,3 @@
1
1
  class CSVKit
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csvkit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-12-20 00:00:00.000000000 Z
12
+ date: 2012-12-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri