embulk-parser-flexml 0.0.2 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 80db449622d6691e3e02d13c97d749af6d74376a
4
- data.tar.gz: 165ea230d03e9d226218080b8a4dbd2576b58b73
3
+ metadata.gz: 88bcb05c13577d9578dba6cf39f3cb715b022a8e
4
+ data.tar.gz: 8ffb9d31f11f38bf02b439704a52d38339e2d5db
5
5
  SHA512:
6
- metadata.gz: 96315c75ad168d16b595182e726a0a53e1bf9082452351bd67870345ffec23ee46b550e625667b39b8712026ddbf5d4c48279f4dd2eb83bcbf693333b33cbf9b
7
- data.tar.gz: 6fe4566d7f0795964934ba87889a37f3583eba5dc372398f037da816bc5ac54ec1b06f20215f22e8982534ba0d82f0dedc242b9c56ace534825aa39c70d24a34
6
+ metadata.gz: 0cefedddeb0dfcbed1818a7f954e7d37e106e2c19199548afa2ba279cb1a17514906bcd2006e203c1e1e747600291ae6747027fc7d813d1ed86db6d343e3bb63
7
+ data.tar.gz: 3c5b4a1569a2bf86f1af37e96d0a6bfcbb31c6506b983b8a4cd8fbe5a681c78bc76fd4aff2bbed620a6b2b7e361d3392174ddd8fa208a72bd915323007e85647
data/README.md CHANGED
@@ -1,2 +1,55 @@
1
1
  # embulk-parser-flexml
2
- Flexible xml parser for embulk - supports xpath and attributes
2
+
3
+ Parser plugin for [Embulk](https://github.com/embulk/embulk).
4
+
5
+ Flexible xml parser for embulk. read data using xpath and from attributes
6
+
7
+ * **Plugin type**: parser
8
+ * **Load all or nothing**: yes
9
+ * **Resume supported**: no
10
+
11
+ ## Configuration
12
+
13
+ - **type**: specify this plugin as `flexml` .
14
+ - **root**: root property to start fetching each entries, specify in *path/to/node* style (string, required)
15
+ - **schema**: specify the attribute of table and data type (required)
16
+ - **name**: name of the attribute (string, required)
17
+ - **type**: type of the attribute (string, required)
18
+ - **attribute**: if specified, value of this attribute will be the output, otherwise child will be the output (string, optional)
19
+ - **xpath**: child element to select (string, required)
20
+ - **format**: timestamp format to parse (string, required)
21
+ - **timezone**: timestamp will be parsed in this timezone (string, optional)
22
+
23
+
24
+ ## Example
25
+
26
+ ### Configuration
27
+
28
+ ```yaml
29
+ parser:
30
+ type: flexml
31
+ root: Team/Players/Player
32
+ schema:
33
+ - { name: name, type: string, attribute: name }
34
+ - { name: age, type: long, attribute: age }
35
+ - { name: about, type: string, xpath: About }
36
+ - { name: facebook, type: string, xpath: "SocialMedia[@type='facebook']", attribute: url }
37
+ - { name: twitter, type: string, xpath: "SocialMedia[@type='twitter']", attribute: url }
38
+ ```
39
+
40
+ ### XML
41
+
42
+ ```xml
43
+ <?xml version="1.0" encoding="utf-8" standalone="no"?>
44
+ <Team>
45
+ <Players>
46
+ <Player name="Locatelli" age="23">
47
+ <About>
48
+ Manuel Locatelli Cavaliere OMRI (born 8 January 1998) is an Italian professional footballer who plays as a midfielder for Serie A club Juventus, on loan from Serie A club Sassuolo, and the Italy national team.
49
+ </About>
50
+ <SocialMedia type="facebook" url="https://www.facebook.com/locamanuel73"/>
51
+ <SocialMedia type="twitter" url="https://twitter.com/locamanuel73"/>
52
+ </Player>
53
+ </Players>
54
+ </Team>
55
+ ```
@@ -3,7 +3,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
3
 
4
4
  Gem::Specification.new do |spec|
5
5
  spec.name = "embulk-parser-flexml"
6
- spec.version = "0.0.2"
6
+ spec.version = "0.2.1"
7
7
  spec.authors = ["Surya Asriadie"]
8
8
  spec.email = ["surya.asriadie@gmail.com"]
9
9
  spec.summary = %q{Flexible Embulk parser plugin for XML}
@@ -14,7 +14,7 @@ module Embulk
14
14
  if s["type"] == "timestamp"
15
15
  memo[s["name"]].merge!({
16
16
  "format" => s["format"],
17
- "timezone" => s["timezone"] || "+0900"
17
+ "timezone" => s["timezone"]
18
18
  })
19
19
  end
20
20
  memo
@@ -38,8 +38,6 @@ module Embulk
38
38
  values = @task[:schema].map do |f, c|
39
39
  row = if c.has_key?("xpath")
40
40
  XPath.first(e, c["xpath"])
41
- elsif c.has_key?("element")
42
- e.element(c["element"])
43
41
  else
44
42
  e
45
43
  end
@@ -55,7 +53,7 @@ module Embulk
55
53
  @page_builder.add(values)
56
54
  end
57
55
  rescue Exception => e
58
- Embulk.logger.error e
56
+ Embulk.logger.error "Failed to parse xml: #{e.message}"
59
57
  end
60
58
  end
61
59
 
@@ -76,6 +74,8 @@ module Embulk
76
74
  when "timestamp"
77
75
  unless v.empty?
78
76
  dest = Time.strptime(v, config["format"])
77
+ return dest.utc if config["timezone"].nil?
78
+
79
79
  utc_offset = dest.utc_offset
80
80
  zone_offset = Time.zone_offset(config["timezone"])
81
81
  dest.localtime(zone_offset) + utc_offset - zone_offset
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-parser-flexml
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Surya Asriadie
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-11-15 00:00:00.000000000 Z
11
+ date: 2021-12-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement