embulk-parser-flexml 0.0.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +54 -1
- data/embulk-parser-flexml.gemspec +2 -2
- data/lib/embulk/parser/flexml.rb +4 -4
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b76e551255f6f521fd1f4523bc51b996b55f9e27
|
4
|
+
data.tar.gz: 0e90b31b02642200bc23010cd87486968c752492
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 93dd096b3c715506298bf007c9b68177182071d49da644d1519811a82af2e1649ff263ea3d5da0da7914b802f79ee0184a1565f86926fa7a5735966521002479
|
7
|
+
data.tar.gz: 2e37018a51524203ff8d9011627e9133b55b8fcd3c6fc24bff415ad844ee9bf1922042e5a01a288a67af5fae801fb4b12baec83a5515a6b947847a98610d8d44
|
data/README.md
CHANGED
@@ -1,2 +1,55 @@
|
|
1
1
|
# embulk-parser-flexml
|
2
|
-
|
2
|
+
|
3
|
+
Parser plugin for [Embulk](https://github.com/embulk/embulk).
|
4
|
+
|
5
|
+
Flexible xml parser for embulk. read data using xpath and from attributes
|
6
|
+
|
7
|
+
* **Plugin type**: parser
|
8
|
+
* **Load all or nothing**: yes
|
9
|
+
* **Resume supported**: no
|
10
|
+
|
11
|
+
## Configuration
|
12
|
+
|
13
|
+
- **type**: specify this plugin as `flexml` .
|
14
|
+
- **root**: root property to start fetching each entries, specify in *path/to/node* style (string, required)
|
15
|
+
- **schema**: specify the attribute of table and data type (required)
|
16
|
+
- **name**: name of the attribute (string, required)
|
17
|
+
- **type**: type of the attribute (string, required)
|
18
|
+
- **attribute**: if specified, value of this attribute will be the output, otherwise child will be the output (string, optional)
|
19
|
+
- **xpath**: child element to select (string, required)
|
20
|
+
- **format**: timestamp format to parse (string, required)
|
21
|
+
- **timezone**: timestamp will be parsed in this timezone (string, optional)
|
22
|
+
|
23
|
+
|
24
|
+
## Example
|
25
|
+
|
26
|
+
### Configuration
|
27
|
+
|
28
|
+
```yaml
|
29
|
+
parser:
|
30
|
+
type: flexml
|
31
|
+
root: Team/Players/Player
|
32
|
+
schema:
|
33
|
+
- { name: name, type: string, attribute: name }
|
34
|
+
- { name: age, type: long, attribute: age }
|
35
|
+
- { name: about, type: string, xpath: About }
|
36
|
+
- { name: facebook, type: string, xpath: "SocialMedia[@type='facebook']", attribute: url }
|
37
|
+
- { name: twitter, type: string, xpath: "SocialMedia[@type='twitter']", attribute: url }
|
38
|
+
```
|
39
|
+
|
40
|
+
### XML
|
41
|
+
|
42
|
+
```xml
|
43
|
+
<?xml version="1.0" encoding="utf-8" standalone="no"?>
|
44
|
+
<Team>
|
45
|
+
<Players>
|
46
|
+
<Player name="Locatelli" age="23">
|
47
|
+
<About>
|
48
|
+
Manuel Locatelli Cavaliere OMRI (born 8 January 1998) is an Italian professional footballer who plays as a midfielder for Serie A club Juventus, on loan from Serie A club Sassuolo, and the Italy national team.
|
49
|
+
</About>
|
50
|
+
<SocialMedia type="facebook" url="https://www.facebook.com/locamanuel73"/>
|
51
|
+
<SocialMedia type="twitter" url="https://twitter.com/locamanuel73"/>
|
52
|
+
</Player>
|
53
|
+
</Players>
|
54
|
+
</Team>
|
55
|
+
```
|
@@ -3,7 +3,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |spec|
|
5
5
|
spec.name = "embulk-parser-flexml"
|
6
|
-
spec.version = "0.0
|
6
|
+
spec.version = "0.2.0"
|
7
7
|
spec.authors = ["Surya Asriadie"]
|
8
8
|
spec.email = ["surya.asriadie@gmail.com"]
|
9
9
|
spec.summary = %q{Flexible Embulk parser plugin for XML}
|
@@ -15,7 +15,7 @@ Gem::Specification.new do |spec|
|
|
15
15
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
16
16
|
spec.require_paths = ["lib"]
|
17
17
|
|
18
|
-
spec.add_dependency "rexml", "~> 3.1"
|
18
|
+
spec.add_dependency "rexml", "~> 3.1.9"
|
19
19
|
spec.add_development_dependency "bundler", "~> 1.0"
|
20
20
|
spec.add_development_dependency 'embulk', ['>= 0.9.15']
|
21
21
|
spec.add_development_dependency "rake", "~> 10.0"
|
data/lib/embulk/parser/flexml.rb
CHANGED
@@ -14,7 +14,7 @@ module Embulk
|
|
14
14
|
if s["type"] == "timestamp"
|
15
15
|
memo[s["name"]].merge!({
|
16
16
|
"format" => s["format"],
|
17
|
-
"timezone" => s["timezone"]
|
17
|
+
"timezone" => s["timezone"]
|
18
18
|
})
|
19
19
|
end
|
20
20
|
memo
|
@@ -38,8 +38,6 @@ module Embulk
|
|
38
38
|
values = @task[:schema].map do |f, c|
|
39
39
|
row = if c.has_key?("xpath")
|
40
40
|
XPath.first(e, c["xpath"])
|
41
|
-
elsif c.has_key?("element")
|
42
|
-
e.element(c["element"])
|
43
41
|
else
|
44
42
|
e
|
45
43
|
end
|
@@ -55,7 +53,7 @@ module Embulk
|
|
55
53
|
@page_builder.add(values)
|
56
54
|
end
|
57
55
|
rescue Exception => e
|
58
|
-
|
56
|
+
Embulk.logger.error "Failed to parse xml: #{e.message}"
|
59
57
|
end
|
60
58
|
end
|
61
59
|
|
@@ -76,6 +74,8 @@ module Embulk
|
|
76
74
|
when "timestamp"
|
77
75
|
unless v.empty?
|
78
76
|
dest = Time.strptime(v, config["format"])
|
77
|
+
return dest.utc if config["timezone"].empty?
|
78
|
+
|
79
79
|
utc_offset = dest.utc_offset
|
80
80
|
zone_offset = Time.zone_offset(config["timezone"])
|
81
81
|
dest.localtime(zone_offset) + utc_offset - zone_offset
|
metadata
CHANGED
@@ -1,21 +1,21 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-parser-flexml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Surya Asriadie
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-12-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
15
15
|
requirements:
|
16
16
|
- - "~>"
|
17
17
|
- !ruby/object:Gem::Version
|
18
|
-
version:
|
18
|
+
version: 3.1.9
|
19
19
|
name: rexml
|
20
20
|
prerelease: false
|
21
21
|
type: :runtime
|
@@ -23,7 +23,7 @@ dependencies:
|
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: 3.1.9
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
requirement: !ruby/object:Gem::Requirement
|
29
29
|
requirements:
|