embulk-parser-flexml 0.0.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 70dc509b53da61f755e0991ef539092f5e5b30ab
4
- data.tar.gz: 5183d09472fde46d526af247a02b262d1e7eb2c6
3
+ metadata.gz: b76e551255f6f521fd1f4523bc51b996b55f9e27
4
+ data.tar.gz: 0e90b31b02642200bc23010cd87486968c752492
5
5
  SHA512:
6
- metadata.gz: 2c6f7f8cc4251e0bfeb7777a7f6de57b392ce16df2af57632e8d1a59f3c2f879d02594040f46c6d26fdc385974ada848e84a9c5c8dbcfe9180c137b3bdccd93a
7
- data.tar.gz: bea0d8c2363080e866e2b12a8ed96467c9e61c894a6508efcc4aef2796ebe8444c04736c7f76bdc6908a5bb6e90dd92c23eca248fe6ee3064f53767df9ba6f10
6
+ metadata.gz: 93dd096b3c715506298bf007c9b68177182071d49da644d1519811a82af2e1649ff263ea3d5da0da7914b802f79ee0184a1565f86926fa7a5735966521002479
7
+ data.tar.gz: 2e37018a51524203ff8d9011627e9133b55b8fcd3c6fc24bff415ad844ee9bf1922042e5a01a288a67af5fae801fb4b12baec83a5515a6b947847a98610d8d44
data/README.md CHANGED
@@ -1,2 +1,55 @@
1
1
  # embulk-parser-flexml
2
- Flexible xml parser for embulk - supports xpath and attributes
2
+
3
+ Parser plugin for [Embulk](https://github.com/embulk/embulk).
4
+
5
+ Flexible xml parser for embulk. read data using xpath and from attributes
6
+
7
+ * **Plugin type**: parser
8
+ * **Load all or nothing**: yes
9
+ * **Resume supported**: no
10
+
11
+ ## Configuration
12
+
13
+ - **type**: specify this plugin as `flexml` .
14
+ - **root**: root property to start fetching each entries, specify in *path/to/node* style (string, required)
15
+ - **schema**: specify the attribute of table and data type (required)
16
+ - **name**: name of the attribute (string, required)
17
+ - **type**: type of the attribute (string, required)
18
+ - **attribute**: if specified, value of this attribute will be the output, otherwise child will be the output (string, optional)
19
+ - **xpath**: child element to select (string, required)
20
+ - **format**: timestamp format to parse (string, required)
21
+ - **timezone**: timestamp will be parsed in this timezone (string, optional)
22
+
23
+
24
+ ## Example
25
+
26
+ ### Configuration
27
+
28
+ ```yaml
29
+ parser:
30
+ type: flexml
31
+ root: Team/Players/Player
32
+ schema:
33
+ - { name: name, type: string, attribute: name }
34
+ - { name: age, type: long, attribute: age }
35
+ - { name: about, type: string, xpath: About }
36
+ - { name: facebook, type: string, xpath: "SocialMedia[@type='facebook']", attribute: url }
37
+ - { name: twitter, type: string, xpath: "SocialMedia[@type='twitter']", attribute: url }
38
+ ```
39
+
40
+ ### XML
41
+
42
+ ```xml
43
+ <?xml version="1.0" encoding="utf-8" standalone="no"?>
44
+ <Team>
45
+ <Players>
46
+ <Player name="Locatelli" age="23">
47
+ <About>
48
+ Manuel Locatelli Cavaliere OMRI (born 8 January 1998) is an Italian professional footballer who plays as a midfielder for Serie A club Juventus, on loan from Serie A club Sassuolo, and the Italy national team.
49
+ </About>
50
+ <SocialMedia type="facebook" url="https://www.facebook.com/locamanuel73"/>
51
+ <SocialMedia type="twitter" url="https://twitter.com/locamanuel73"/>
52
+ </Player>
53
+ </Players>
54
+ </Team>
55
+ ```
@@ -3,7 +3,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
3
 
4
4
  Gem::Specification.new do |spec|
5
5
  spec.name = "embulk-parser-flexml"
6
- spec.version = "0.0.1"
6
+ spec.version = "0.2.0"
7
7
  spec.authors = ["Surya Asriadie"]
8
8
  spec.email = ["surya.asriadie@gmail.com"]
9
9
  spec.summary = %q{Flexible Embulk parser plugin for XML}
@@ -15,7 +15,7 @@ Gem::Specification.new do |spec|
15
15
  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
16
16
  spec.require_paths = ["lib"]
17
17
 
18
- spec.add_dependency "rexml", "~> 3.1"
18
+ spec.add_dependency "rexml", "~> 3.1.9"
19
19
  spec.add_development_dependency "bundler", "~> 1.0"
20
20
  spec.add_development_dependency 'embulk', ['>= 0.9.15']
21
21
  spec.add_development_dependency "rake", "~> 10.0"
@@ -14,7 +14,7 @@ module Embulk
14
14
  if s["type"] == "timestamp"
15
15
  memo[s["name"]].merge!({
16
16
  "format" => s["format"],
17
- "timezone" => s["timezone"] || "+0900"
17
+ "timezone" => s["timezone"]
18
18
  })
19
19
  end
20
20
  memo
@@ -38,8 +38,6 @@ module Embulk
38
38
  values = @task[:schema].map do |f, c|
39
39
  row = if c.has_key?("xpath")
40
40
  XPath.first(e, c["xpath"])
41
- elsif c.has_key?("element")
42
- e.element(c["element"])
43
41
  else
44
42
  e
45
43
  end
@@ -55,7 +53,7 @@ module Embulk
55
53
  @page_builder.add(values)
56
54
  end
57
55
  rescue Exception => e
58
- puts "=>>>> ERROR: #{e}"
56
+ Embulk.logger.error "Failed to parse xml: #{e.message}"
59
57
  end
60
58
  end
61
59
 
@@ -76,6 +74,8 @@ module Embulk
76
74
  when "timestamp"
77
75
  unless v.empty?
78
76
  dest = Time.strptime(v, config["format"])
77
+ return dest.utc if config["timezone"].empty?
78
+
79
79
  utc_offset = dest.utc_offset
80
80
  zone_offset = Time.zone_offset(config["timezone"])
81
81
  dest.localtime(zone_offset) + utc_offset - zone_offset
metadata CHANGED
@@ -1,21 +1,21 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-parser-flexml
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Surya Asriadie
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-11-15 00:00:00.000000000 Z
11
+ date: 2021-12-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
15
15
  requirements:
16
16
  - - "~>"
17
17
  - !ruby/object:Gem::Version
18
- version: '3.1'
18
+ version: 3.1.9
19
19
  name: rexml
20
20
  prerelease: false
21
21
  type: :runtime
@@ -23,7 +23,7 @@ dependencies:
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '3.1'
26
+ version: 3.1.9
27
27
  - !ruby/object:Gem::Dependency
28
28
  requirement: !ruby/object:Gem::Requirement
29
29
  requirements: