embulk-parser-xml 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/embulk-parser-xml.gemspec +2 -1
- data/lib/embulk/parser/xml.rb +90 -33
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 31b1d69fab1c3bfcf1e676508a8f7e8c644db85e
|
4
|
+
data.tar.gz: 86d1162369def6860f2471c008b0842469db303a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2f81442fd1695ccb607a1f056a190432a3ed94b146a4650cbcf2d9d6ebe10e716a2068e41860ee18a275c36223b1bec51ce3bca4718700229079d452d069ae64
|
7
|
+
data.tar.gz: 6e8c4e50183802026bddab9eea6b298ff2f06af6ac8a773cbf0ae3afda50feaa095127163052c5aac648cd1364ef6d899f96208ef2b10f83674feba04c6c5422
|
data/embulk-parser-xml.gemspec
CHANGED
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
|
5
5
|
Gem::Specification.new do |spec|
|
6
6
|
spec.name = "embulk-parser-xml"
|
7
|
-
spec.version = "0.0.
|
7
|
+
spec.version = "0.0.2"
|
8
8
|
spec.authors = ["Takuma kanari"]
|
9
9
|
spec.email = ["chemtrails.t@gmail.com"]
|
10
10
|
spec.summary = %q{Embulk parser plugin for XML}
|
@@ -16,6 +16,7 @@ Gem::Specification.new do |spec|
|
|
16
16
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
17
17
|
spec.require_paths = ["lib"]
|
18
18
|
|
19
|
+
spec.add_dependency "nokogiri", "~> 1.6"
|
19
20
|
spec.add_development_dependency "bundler", "~> 1.0"
|
20
21
|
spec.add_development_dependency "rake", "~> 10.0"
|
21
22
|
end
|
data/lib/embulk/parser/xml.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require "
|
1
|
+
require "nokogiri"
|
2
2
|
|
3
3
|
module Embulk
|
4
4
|
module Parser
|
@@ -7,54 +7,111 @@ module Embulk
|
|
7
7
|
Plugin.register_parser("xml", self)
|
8
8
|
|
9
9
|
def self.transaction(config, &control)
|
10
|
+
schema = config.param("schema", :array)
|
11
|
+
schema_serialized = schema.inject({}) do |memo, s|
|
12
|
+
memo[s["name"]] = s["type"]
|
13
|
+
memo
|
14
|
+
end
|
10
15
|
task = {
|
11
|
-
:schema =>
|
12
|
-
:
|
16
|
+
:schema => schema_serialized,
|
17
|
+
:root_to_route => config.param("root", :string).split("/")
|
13
18
|
}
|
14
|
-
columns =
|
19
|
+
columns = schema.each_with_index.map do |c, i|
|
15
20
|
Column.new(i, c["name"], c["type"].to_sym)
|
16
21
|
end
|
17
22
|
yield(task, columns)
|
18
23
|
end
|
19
24
|
|
20
25
|
def run(file_input)
|
21
|
-
|
22
|
-
|
26
|
+
on_new_record = lambda {|record|
|
27
|
+
@page_builder.add(record)
|
28
|
+
}
|
29
|
+
doc = RecordBinder.new(@task["root_to_route"],
|
30
|
+
@task["schema"], on_new_record)
|
31
|
+
parser = Nokogiri::XML::SAX::Parser.new(doc)
|
23
32
|
while file = file_input.next_file
|
24
|
-
|
25
|
-
|
26
|
-
e.elements.each do |d|
|
27
|
-
dest[d.name] = d.text
|
28
|
-
end
|
29
|
-
@page_builder.add(make_record(schema, dest))
|
30
|
-
end
|
33
|
+
parser.parse(file.read)
|
34
|
+
doc.clear
|
31
35
|
end
|
32
36
|
@page_builder.finish
|
33
37
|
end
|
38
|
+
end
|
34
39
|
|
35
|
-
|
40
|
+
class RecordBinder < Nokogiri::XML::SAX::Document
|
36
41
|
|
37
|
-
def
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
42
|
+
def initialize(route, schema, on_new_record)
|
43
|
+
@route = route
|
44
|
+
@schema = schema
|
45
|
+
@on_new_record = on_new_record
|
46
|
+
clear
|
47
|
+
super()
|
48
|
+
end
|
49
|
+
|
50
|
+
def clear
|
51
|
+
@find_route_idx = 0
|
52
|
+
@enter = false
|
53
|
+
@current_element_name = nil
|
54
|
+
@current_data = new_map_by_schema
|
55
|
+
end
|
56
|
+
|
57
|
+
def start_element(name, attributes = [])
|
58
|
+
if !@enter
|
59
|
+
if name == @route[@find_route_idx]
|
60
|
+
if @find_route_idx == @route.size - 1
|
61
|
+
@enter = true
|
55
62
|
else
|
56
|
-
|
63
|
+
@find_route_idx += 1
|
64
|
+
end
|
57
65
|
end
|
66
|
+
else
|
67
|
+
@current_element_name = (@schema[name].nil?) ? nil : name
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def characters(string)
|
72
|
+
return if !@enter || string.strip.size == 0 || @current_element_name.nil?
|
73
|
+
val = @current_data[@current_element_name]
|
74
|
+
val = "" if val.nil?
|
75
|
+
val += string
|
76
|
+
@current_data[@current_element_name] = val
|
77
|
+
end
|
78
|
+
|
79
|
+
def end_element(name, attributes = [])
|
80
|
+
if @enter
|
81
|
+
if name == @route.last
|
82
|
+
@enter = false
|
83
|
+
@on_new_record.call(@current_data.map{|k, v| v})
|
84
|
+
@current_data = new_map_by_schema
|
85
|
+
elsif !@current_element_name.nil?
|
86
|
+
@current_data[name] = convert(@current_data[name], @schema[name])
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
private
|
92
|
+
|
93
|
+
def new_map_by_schema
|
94
|
+
@schema.keys.inject({}) do |memo, k|
|
95
|
+
memo[k] = nil
|
96
|
+
memo
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def convert(val, type)
|
101
|
+
v = val.nil? ? "" : val
|
102
|
+
case type
|
103
|
+
when "string"
|
104
|
+
v
|
105
|
+
when "long"
|
106
|
+
v.to_i
|
107
|
+
when "double"
|
108
|
+
v.to_f
|
109
|
+
when "boolean"
|
110
|
+
["yes", "true", "1"].include?(v.downcase)
|
111
|
+
when "timestamp"
|
112
|
+
v.empty? ? nil : Time.strptime(v, c["format"])
|
113
|
+
else
|
114
|
+
raise "Unsupported type #{type}"
|
58
115
|
end
|
59
116
|
end
|
60
117
|
end
|
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-parser-xml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Takuma kanari
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-04-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.6'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.6'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: bundler
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|