harmonized_tariff 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +18 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +27 -0
- data/LICENSE.txt +22 -0
- data/README.md +50 -0
- data/Rakefile +1 -0
- data/bin/hts +4 -0
- data/bin/rake +16 -0
- data/data/1300_HTS_delimited.txt +28920 -0
- data/data/hts.gz +0 -0
- data/data/hts.json +1 -0
- data/data/hts.sql +47285 -0
- data/data/hts.xml +305949 -0
- data/harmonized_tariff.gemspec +27 -0
- data/lib/harmonized_tariff.rb +6 -0
- data/lib/harmonized_tariff/cli.rb +105 -0
- data/lib/harmonized_tariff/convert.rb +184 -0
- data/lib/harmonized_tariff/version.rb +3 -0
- metadata +146 -0
@@ -0,0 +1,27 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'harmonized_tariff/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "harmonized_tariff"
|
8
|
+
spec.version = HarmonizedTariff::VERSION
|
9
|
+
spec.authors = ["Todd Treece"]
|
10
|
+
spec.email = ["toddtreece@gmail.com"]
|
11
|
+
spec.description = %q{A utility for converting the Harmonized Tariff Schedule from hts.usitc.gov to JSON, SQL, and XML.}
|
12
|
+
spec.summary = %q{Harmonized Tariff Schedule Converter}
|
13
|
+
spec.homepage = "http://github.com/toddtreece/harmonized_tariff"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = ["hts"]
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
spec.add_dependency "activesupport"
|
24
|
+
spec.add_dependency "builder"
|
25
|
+
spec.add_dependency "heredoc_unindent"
|
26
|
+
|
27
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
require 'optparse/time'
|
3
|
+
require 'zlib'
|
4
|
+
|
5
|
+
module HarmonizedTariff
|
6
|
+
|
7
|
+
class CLI
|
8
|
+
|
9
|
+
@@type = 'json'
|
10
|
+
@@verbose = false
|
11
|
+
@@destination = '~/'
|
12
|
+
|
13
|
+
def self.parse(args)
|
14
|
+
|
15
|
+
hts = HarmonizedTariff::Convert.new
|
16
|
+
|
17
|
+
opt_parser = OptionParser.new do |opts|
|
18
|
+
|
19
|
+
opts.banner = "Usage: bundle exec bin/hts [options]"
|
20
|
+
|
21
|
+
opts.separator ""
|
22
|
+
opts.separator "Specific options:"
|
23
|
+
|
24
|
+
opts.on("-s", "--source [PATH]", "Set source file") do |source|
|
25
|
+
hts.source = source || hts.source
|
26
|
+
end
|
27
|
+
|
28
|
+
opts.on("-o", "--output [PATH]", "Set destination folder path") do |path|
|
29
|
+
@@destination = path || destination
|
30
|
+
end
|
31
|
+
|
32
|
+
opts.on("--type [TYPE]", "Select output format (json, xml, sql, gz (gzipped sql))") do |t|
|
33
|
+
@@type = t.downcase
|
34
|
+
end
|
35
|
+
|
36
|
+
opts.on("-v", "--verbose", "Print output to screen") do |d|
|
37
|
+
@@verbose = d
|
38
|
+
end
|
39
|
+
|
40
|
+
opts.separator ""
|
41
|
+
opts.separator "Common options:"
|
42
|
+
|
43
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
44
|
+
puts opts
|
45
|
+
exit
|
46
|
+
end
|
47
|
+
|
48
|
+
opts.on_tail("--version", "Show version") do
|
49
|
+
puts HarmonizedTariff::VERSION
|
50
|
+
exit
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
opt_parser.parse!(args)
|
56
|
+
|
57
|
+
# parse the file
|
58
|
+
hts.load
|
59
|
+
|
60
|
+
if @@type == 'json'
|
61
|
+
self.output hts.toJSON
|
62
|
+
elsif @@type == 'xml'
|
63
|
+
self.output hts.toXML
|
64
|
+
elsif @@type == 'sql'
|
65
|
+
self.output hts.toSQL
|
66
|
+
elsif @@type == 'gz'
|
67
|
+
self.gzip hts.toSQL
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
|
72
|
+
def self.output(data)
|
73
|
+
|
74
|
+
path = File.absolute_path(@@destination) + File::SEPARATOR + 'hts.' + @@type
|
75
|
+
|
76
|
+
puts 'Outputting converted ' + @@type.upcase + ' to: ' + path + "\n"
|
77
|
+
|
78
|
+
target = File.new(path, 'w')
|
79
|
+
target.write data
|
80
|
+
target.close
|
81
|
+
|
82
|
+
if @@verbose
|
83
|
+
puts data
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
|
88
|
+
def self.gzip(data)
|
89
|
+
|
90
|
+
path = File.expand_path(@@destination) + File::SEPARATOR + 'hts.' + @@type
|
91
|
+
|
92
|
+
puts 'Outputting converted gzipped SQL to: ' + path + "\n"
|
93
|
+
|
94
|
+
File.open(path, 'w') do |f|
|
95
|
+
gz = Zlib::GzipWriter.new(f)
|
96
|
+
gz.write data
|
97
|
+
gz.close
|
98
|
+
end
|
99
|
+
|
100
|
+
end
|
101
|
+
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
|
@@ -0,0 +1,184 @@
|
|
1
|
+
require "csv"
|
2
|
+
require "json"
|
3
|
+
require "active_support/core_ext"
|
4
|
+
require "heredoc_unindent"
|
5
|
+
|
6
|
+
module HarmonizedTariff
|
7
|
+
|
8
|
+
class Convert
|
9
|
+
|
10
|
+
attr_accessor :source, :encoding, :delimeter
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
|
14
|
+
spec = Gem::Specification.find_by_name("harmonized_tariff")
|
15
|
+
gem_root = spec.gem_dir
|
16
|
+
|
17
|
+
@source = gem_root + '/data/1300_HTS_delimited.txt'
|
18
|
+
@encoding = 'ISO8859-1'
|
19
|
+
@delimeter = '|'
|
20
|
+
@raw = Array.new
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
def toJSON
|
25
|
+
return @data.to_json
|
26
|
+
end
|
27
|
+
|
28
|
+
def toXML
|
29
|
+
return @data.to_xml(:root => 'tariffs')
|
30
|
+
end
|
31
|
+
|
32
|
+
def toSQL
|
33
|
+
|
34
|
+
sql = <<-SQL.unindent
|
35
|
+
-- create the table
|
36
|
+
CREATE TABLE `harmonized_tariffs` (
|
37
|
+
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
|
38
|
+
`code` varchar(255) NOT NULL DEFAULT '',
|
39
|
+
`suffix` varchar(255) DEFAULT NULL,
|
40
|
+
`description` text,
|
41
|
+
`unit` varchar(255) DEFAULT NULL,
|
42
|
+
`rate_1` varchar(255) DEFAULT NULL,
|
43
|
+
`special_rate` varchar(255) DEFAULT '',
|
44
|
+
`rate_2` varchar(255) DEFAULT NULL,
|
45
|
+
`notes` text,
|
46
|
+
PRIMARY KEY (`id`),
|
47
|
+
KEY `tariff` (`code`)
|
48
|
+
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
49
|
+
-- insert the flattened data
|
50
|
+
SQL
|
51
|
+
|
52
|
+
@data.each do |child|
|
53
|
+
sql << self.buildSQL(child)
|
54
|
+
end
|
55
|
+
|
56
|
+
return sql
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
def buildSQL(parent)
|
61
|
+
|
62
|
+
sql = ''
|
63
|
+
|
64
|
+
unless parent[:number].nil?
|
65
|
+
sql << self.sqlRow(parent)
|
66
|
+
end
|
67
|
+
|
68
|
+
if parent[:children].nil?
|
69
|
+
return sql
|
70
|
+
end
|
71
|
+
|
72
|
+
parent[:children].each do |child|
|
73
|
+
|
74
|
+
child[:description] = parent[:description] + ' ' + child[:description]
|
75
|
+
|
76
|
+
sql << self.buildSQL(child)
|
77
|
+
|
78
|
+
end
|
79
|
+
|
80
|
+
return sql
|
81
|
+
|
82
|
+
end
|
83
|
+
|
84
|
+
def sqlRow(row)
|
85
|
+
|
86
|
+
clean = Hash.new
|
87
|
+
|
88
|
+
row.each do |k,v|
|
89
|
+
|
90
|
+
if k == :children
|
91
|
+
next
|
92
|
+
end
|
93
|
+
|
94
|
+
if v.nil?
|
95
|
+
clean[k] = 'NULL'
|
96
|
+
else
|
97
|
+
clean[k] = "'" + self.escape(v) + "'"
|
98
|
+
end
|
99
|
+
|
100
|
+
end
|
101
|
+
|
102
|
+
sql = <<-SQL.unindent
|
103
|
+
INSERT INTO `harmonized_tariffs` (`id`, `code`, `suffix`, `description`, `unit`, `rate_1`, `special_rate`, `rate_2`, `notes`)
|
104
|
+
VALUES (NULL, #{clean[:number]}, #{clean[:suffix]}, #{clean[:description]}, #{clean[:unit]}, #{clean[:col1_rate]}, #{clean[:special_rate]}, #{clean[:col2_rate]}, #{clean[:note]});
|
105
|
+
SQL
|
106
|
+
|
107
|
+
|
108
|
+
return sql
|
109
|
+
|
110
|
+
end
|
111
|
+
|
112
|
+
def escape(text)
|
113
|
+
|
114
|
+
text = text.gsub(/'/) {|s| "\\'"}
|
115
|
+
text = text.gsub(/"/) {|s| "\""}
|
116
|
+
text = text.gsub(/\\/) {|s| "\\"}
|
117
|
+
|
118
|
+
return text
|
119
|
+
|
120
|
+
end
|
121
|
+
|
122
|
+
def load
|
123
|
+
|
124
|
+
parent = nil
|
125
|
+
previous = nil
|
126
|
+
|
127
|
+
@raw = CSV.read(@source, col_sep: @delimeter, encoding: @encoding, quote_char: "\x00")
|
128
|
+
|
129
|
+
root = self.build_heirarchy(Hash.new, 0, 1)
|
130
|
+
|
131
|
+
@data = root[:children]
|
132
|
+
|
133
|
+
@raw = nil
|
134
|
+
root = nil
|
135
|
+
|
136
|
+
end
|
137
|
+
|
138
|
+
def build_heirarchy(parent, depth, index)
|
139
|
+
|
140
|
+
siblings = Array.new
|
141
|
+
|
142
|
+
while index < @raw.length
|
143
|
+
|
144
|
+
current = Hash.new
|
145
|
+
|
146
|
+
current[:number], current[:suffix], level, current[:description], current[:unit], current[:col1_rate], current[:special_rate], current[:col2_rate], current[:note] = @raw[index]
|
147
|
+
|
148
|
+
if level.nil?
|
149
|
+
index += 1
|
150
|
+
next
|
151
|
+
end
|
152
|
+
|
153
|
+
if level.to_i == depth
|
154
|
+
index += 1
|
155
|
+
siblings.push self.build_heirarchy(current, depth + 1, index)
|
156
|
+
next
|
157
|
+
end
|
158
|
+
|
159
|
+
if level.to_i < depth
|
160
|
+
|
161
|
+
if siblings.length > 0
|
162
|
+
parent[:children] = siblings
|
163
|
+
end
|
164
|
+
|
165
|
+
return parent
|
166
|
+
|
167
|
+
end
|
168
|
+
|
169
|
+
index += 1
|
170
|
+
|
171
|
+
end
|
172
|
+
|
173
|
+
if siblings.length > 0
|
174
|
+
parent[:children] = siblings
|
175
|
+
end
|
176
|
+
|
177
|
+
return parent
|
178
|
+
|
179
|
+
end
|
180
|
+
|
181
|
+
end
|
182
|
+
|
183
|
+
end
|
184
|
+
|
metadata
ADDED
@@ -0,0 +1,146 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: harmonized_tariff
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.4
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Todd Treece
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-05-01 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bundler
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '1.3'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '1.3'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rake
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: activesupport
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: builder
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
type: :runtime
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: heredoc_unindent
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ! '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
86
|
+
type: :runtime
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ! '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
description: A utility for converting the Harmonized Tariff Schedule from hts.usitc.gov
|
95
|
+
to JSON, SQL, and XML.
|
96
|
+
email:
|
97
|
+
- toddtreece@gmail.com
|
98
|
+
executables:
|
99
|
+
- hts
|
100
|
+
extensions: []
|
101
|
+
extra_rdoc_files: []
|
102
|
+
files:
|
103
|
+
- .gitignore
|
104
|
+
- Gemfile
|
105
|
+
- Gemfile.lock
|
106
|
+
- LICENSE.txt
|
107
|
+
- README.md
|
108
|
+
- Rakefile
|
109
|
+
- bin/hts
|
110
|
+
- bin/rake
|
111
|
+
- data/1300_HTS_delimited.txt
|
112
|
+
- data/hts.gz
|
113
|
+
- data/hts.json
|
114
|
+
- data/hts.sql
|
115
|
+
- data/hts.xml
|
116
|
+
- harmonized_tariff.gemspec
|
117
|
+
- lib/harmonized_tariff.rb
|
118
|
+
- lib/harmonized_tariff/cli.rb
|
119
|
+
- lib/harmonized_tariff/convert.rb
|
120
|
+
- lib/harmonized_tariff/version.rb
|
121
|
+
homepage: http://github.com/toddtreece/harmonized_tariff
|
122
|
+
licenses:
|
123
|
+
- MIT
|
124
|
+
post_install_message:
|
125
|
+
rdoc_options: []
|
126
|
+
require_paths:
|
127
|
+
- lib
|
128
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
129
|
+
none: false
|
130
|
+
requirements:
|
131
|
+
- - ! '>='
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
version: '0'
|
134
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
135
|
+
none: false
|
136
|
+
requirements:
|
137
|
+
- - ! '>='
|
138
|
+
- !ruby/object:Gem::Version
|
139
|
+
version: '0'
|
140
|
+
requirements: []
|
141
|
+
rubyforge_project:
|
142
|
+
rubygems_version: 1.8.23
|
143
|
+
signing_key:
|
144
|
+
specification_version: 3
|
145
|
+
summary: Harmonized Tariff Schedule Converter
|
146
|
+
test_files: []
|