embulk-parser-flexml 0.0.2 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +54 -1
- data/embulk-parser-flexml.gemspec +1 -1
- data/lib/embulk/parser/flexml.rb +4 -4
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 88bcb05c13577d9578dba6cf39f3cb715b022a8e
|
4
|
+
data.tar.gz: 8ffb9d31f11f38bf02b439704a52d38339e2d5db
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0cefedddeb0dfcbed1818a7f954e7d37e106e2c19199548afa2ba279cb1a17514906bcd2006e203c1e1e747600291ae6747027fc7d813d1ed86db6d343e3bb63
|
7
|
+
data.tar.gz: 3c5b4a1569a2bf86f1af37e96d0a6bfcbb31c6506b983b8a4cd8fbe5a681c78bc76fd4aff2bbed620a6b2b7e361d3392174ddd8fa208a72bd915323007e85647
|
data/README.md
CHANGED
@@ -1,2 +1,55 @@
|
|
1
1
|
# embulk-parser-flexml
|
2
|
-
|
2
|
+
|
3
|
+
Parser plugin for [Embulk](https://github.com/embulk/embulk).
|
4
|
+
|
5
|
+
Flexible xml parser for embulk. read data using xpath and from attributes
|
6
|
+
|
7
|
+
* **Plugin type**: parser
|
8
|
+
* **Load all or nothing**: yes
|
9
|
+
* **Resume supported**: no
|
10
|
+
|
11
|
+
## Configuration
|
12
|
+
|
13
|
+
- **type**: specify this plugin as `flexml` .
|
14
|
+
- **root**: root property to start fetching each entries, specify in *path/to/node* style (string, required)
|
15
|
+
- **schema**: specify the attribute of table and data type (required)
|
16
|
+
- **name**: name of the attribute (string, required)
|
17
|
+
- **type**: type of the attribute (string, required)
|
18
|
+
- **attribute**: if specified, value of this attribute will be the output, otherwise child will be the output (string, optional)
|
19
|
+
- **xpath**: child element to select (string, required)
|
20
|
+
- **format**: timestamp format to parse (string, required)
|
21
|
+
- **timezone**: timestamp will be parsed in this timezone (string, optional)
|
22
|
+
|
23
|
+
|
24
|
+
## Example
|
25
|
+
|
26
|
+
### Configuration
|
27
|
+
|
28
|
+
```yaml
|
29
|
+
parser:
|
30
|
+
type: flexml
|
31
|
+
root: Team/Players/Player
|
32
|
+
schema:
|
33
|
+
- { name: name, type: string, attribute: name }
|
34
|
+
- { name: age, type: long, attribute: age }
|
35
|
+
- { name: about, type: string, xpath: About }
|
36
|
+
- { name: facebook, type: string, xpath: "SocialMedia[@type='facebook']", attribute: url }
|
37
|
+
- { name: twitter, type: string, xpath: "SocialMedia[@type='twitter']", attribute: url }
|
38
|
+
```
|
39
|
+
|
40
|
+
### XML
|
41
|
+
|
42
|
+
```xml
|
43
|
+
<?xml version="1.0" encoding="utf-8" standalone="no"?>
|
44
|
+
<Team>
|
45
|
+
<Players>
|
46
|
+
<Player name="Locatelli" age="23">
|
47
|
+
<About>
|
48
|
+
Manuel Locatelli Cavaliere OMRI (born 8 January 1998) is an Italian professional footballer who plays as a midfielder for Serie A club Juventus, on loan from Serie A club Sassuolo, and the Italy national team.
|
49
|
+
</About>
|
50
|
+
<SocialMedia type="facebook" url="https://www.facebook.com/locamanuel73"/>
|
51
|
+
<SocialMedia type="twitter" url="https://twitter.com/locamanuel73"/>
|
52
|
+
</Player>
|
53
|
+
</Players>
|
54
|
+
</Team>
|
55
|
+
```
|
@@ -3,7 +3,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |spec|
|
5
5
|
spec.name = "embulk-parser-flexml"
|
6
|
-
spec.version = "0.
|
6
|
+
spec.version = "0.2.1"
|
7
7
|
spec.authors = ["Surya Asriadie"]
|
8
8
|
spec.email = ["surya.asriadie@gmail.com"]
|
9
9
|
spec.summary = %q{Flexible Embulk parser plugin for XML}
|
data/lib/embulk/parser/flexml.rb
CHANGED
@@ -14,7 +14,7 @@ module Embulk
|
|
14
14
|
if s["type"] == "timestamp"
|
15
15
|
memo[s["name"]].merge!({
|
16
16
|
"format" => s["format"],
|
17
|
-
"timezone" => s["timezone"]
|
17
|
+
"timezone" => s["timezone"]
|
18
18
|
})
|
19
19
|
end
|
20
20
|
memo
|
@@ -38,8 +38,6 @@ module Embulk
|
|
38
38
|
values = @task[:schema].map do |f, c|
|
39
39
|
row = if c.has_key?("xpath")
|
40
40
|
XPath.first(e, c["xpath"])
|
41
|
-
elsif c.has_key?("element")
|
42
|
-
e.element(c["element"])
|
43
41
|
else
|
44
42
|
e
|
45
43
|
end
|
@@ -55,7 +53,7 @@ module Embulk
|
|
55
53
|
@page_builder.add(values)
|
56
54
|
end
|
57
55
|
rescue Exception => e
|
58
|
-
Embulk.logger.error e
|
56
|
+
Embulk.logger.error "Failed to parse xml: #{e.message}"
|
59
57
|
end
|
60
58
|
end
|
61
59
|
|
@@ -76,6 +74,8 @@ module Embulk
|
|
76
74
|
when "timestamp"
|
77
75
|
unless v.empty?
|
78
76
|
dest = Time.strptime(v, config["format"])
|
77
|
+
return dest.utc if config["timezone"].nil?
|
78
|
+
|
79
79
|
utc_offset = dest.utc_offset
|
80
80
|
zone_offset = Time.zone_offset(config["timezone"])
|
81
81
|
dest.localtime(zone_offset) + utc_offset - zone_offset
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-parser-flexml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Surya Asriadie
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-12-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|