embulk-input-http 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: d8ea58d778613a1a60214badcea8ea098f9dab77
4
+ data.tar.gz: c5eb80607d79558874a009a2dc47f12696053c7f
5
+ SHA512:
6
+ metadata.gz: 98ab84c0d458317898947ded60b4edfd6b4c132b1aaea930a485d76b14e408b742863fac125172bb46d86d54e9e6b6ea6c3aa6229832054b0dbd3233361ffe90
7
+ data.tar.gz: 984d9997c12a42438546d8b4acf28053fd2d3c6b5620fde4cbbf07718a6cdce631a7e0ffac8b5156005a284856c884c6c28f1f66bc7d8b3e24fca4922093d4ff
data/.gitignore ADDED
@@ -0,0 +1,10 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.gem
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source "https://rubygems.org"
2
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 TODO: Write your name
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,117 @@
1
+ # Embulk::Input::Http
2
+
3
+ Input HTTP plugin for [Embulk](https://github.com/embulk/embulk).
4
+ Read content via HTTP and parse/iterate json(or xml) data.
5
+
6
+ ## Installation
7
+
8
+ Run this command with your embulk binary.
9
+
10
+ ```ruby
11
+ $ embulk gem install embulk-input-http
12
+ ```
13
+
14
+ ## Usage
15
+
16
+ Specify in your config.yml file
17
+
18
+ ```yaml
19
+ in:
20
+ type: http
21
+ url: http://express.heartrails.com/api/json
22
+ params:
23
+ - {name: method, value: getStations}
24
+ - {name: x, value: 135.0}
25
+ - {name: y, value: 35.0}
26
+ schema:
27
+ - {name: name, type: string}
28
+ - {name: next, type: string}
29
+ - {name: prev, type: string}
30
+ - {name: distance, type: string}
31
+ - {name: x, type: double}
32
+ - {name: y, type: double}
33
+ - {name: line, type: string}
34
+ - {name: postal, type: string}
35
+ iterate: {type: json, path: $.response.station}
36
+ method: get
37
+ ```
38
+
39
+ - type: specify this plugin as `http`
40
+ - url: base url something like api (required)
41
+ - schema: specify the attribute of table and data type (required)
42
+ - iterate: data type and path to find root data, json/xml is supported for now (required)
43
+ - method: http method, get is used by default (optional)
44
+ - params: pair of name/value to specify query parameter (optional)
45
+
46
+
47
+ ### Iterate data
48
+
49
+ You can specify 2 types to parse result from HTTP api in *iterate* section.
50
+
51
+
52
+ #### json
53
+
54
+ For this type, you need to specify *path* as [jsonpath](http://goessner.net/articles/JsonPath/).
55
+
56
+ for example:
57
+
58
+ ```json
59
+ {
60
+ "result" : "success",
61
+ "students" : [
62
+ { "name" : "John", "age" : 10 },
63
+ { "name" : "Paul", "age" : 16 },
64
+ { "name" : "George", "age" : 17 },
65
+ { "name" : "Ringo", "age" : 18 }
66
+ ]
67
+ }
68
+ ```
69
+
70
+ You can iterate "students" node by the following condifuration:
71
+
72
+ iterate: {type: json, path: $.students}
73
+
74
+ #### xml
75
+
76
+ You can parse also xml by specifing **path/to/node** style to *path*.
77
+
78
+ for example:
79
+
80
+
81
+ ```xml
82
+ <data>
83
+ <result>true</result>
84
+ <students>
85
+ <student>
86
+ <name>John</name>
87
+ <age>10</name>
88
+ <student>
89
+ <student>
90
+ <name>Paul</name>
91
+ <age>16</name>
92
+ <student>
93
+ <student>
94
+ <name>George</name>
95
+ <age>17</name>
96
+ <student>
97
+ <student>
98
+ <name>Ringo</name>
99
+ <age>18</name>
100
+ <student>
101
+ </students>
102
+ ```
103
+
104
+ Configuration as below to iterate student node:
105
+
106
+ iterate: {type: xml, path: data/students/student}
107
+
108
+
109
+ ## TODO
110
+
111
+ - BasicAuth
112
+ - HTTP-proxy
113
+ - Breace-expansion style parameter, such as curl
114
+
115
+ ## Patch
116
+
117
+ Welcome!
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
@@ -0,0 +1,22 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "embulk-input-http"
7
+ spec.version = "0.0.2"
8
+ spec.authors = ["Takuma kanari"]
9
+ spec.email = ["chemtrails.t@gmail.com"]
10
+ spec.summary = %q{Embulk plugin for http input}
11
+ spec.description = %q{fetch data via http}
12
+ spec.homepage = "https://github.com/takumakanari/embulk-input-http"
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0")
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.require_paths = ["lib"]
18
+
19
+ spec.add_dependency "jsonpath", "~> 0.5"
20
+ spec.add_development_dependency "bundler", "~> 1.0"
21
+ spec.add_development_dependency "rake", "~> 0.9.2"
22
+ end
@@ -0,0 +1,21 @@
1
+ exec: {}
2
+ in:
3
+ type: http
4
+ url: http://express.heartrails.com/api/json
5
+ params:
6
+ - {name: method, value: getStations}
7
+ - {name: x, value: 135.0}
8
+ - {name: y, value: 35.0}
9
+ schema:
10
+ - {name: name, type: string}
11
+ - {name: next, type: string}
12
+ - {name: prev, type: string}
13
+ - {name: distance, type: string}
14
+ - {name: x, type: double}
15
+ - {name: y, type: double}
16
+ - {name: line, type: string}
17
+ - {name: postal, type: string}
18
+ method: get
19
+ iterate: {type: json, path: $.response.station}
20
+ out: {type: stdout}
21
+
@@ -0,0 +1,164 @@
1
+ require "net/http"
2
+ require "uri"
3
+
4
+ module Embulk
5
+ module Input
6
+
7
+ class HttpInputPlugin < InputPlugin
8
+ Plugin.register_input("http", self)
9
+
10
+ def self.transaction(config, &control)
11
+ url = config.param("url", :string)
12
+ schema = config.param("schema", :array)
13
+ method = config.param("method", :string, default: "get")
14
+ params = config.param("params", :array, default: [])
15
+ iterate = config.param("iterate", :hash)
16
+ open_timeout = config.param("open_timeout", :float, default: 2.0)
17
+ read_timeout = config.param("read_timeout", :float, default: 10.0)
18
+
19
+ data_type = iterate["type"]
20
+ unless ["json", "xml"].include?(data_type)
21
+ raise "Unknown data_type #{data_type}, only supported for json or xml"
22
+ end
23
+
24
+ columns = schema.each_with_index.map do |c, i|
25
+ Column.new(i, c["name"], c["type"].to_sym)
26
+ end
27
+
28
+ task = {
29
+ :url => url,
30
+ :method => method,
31
+ :params => params,
32
+ :schema => schema,
33
+ :iterate => iterate,
34
+ :open_timeout => open_timeout,
35
+ :read_timeout => read_timeout
36
+ }
37
+
38
+ report = yield(task, columns, 1)
39
+ config.merge(report["done"].flatten.compact)
40
+ {}
41
+ end
42
+
43
+ def run
44
+ schema = @task["schema"]
45
+ iterate = @task["iterate"]
46
+
47
+ data = fetch.body
48
+ data_type = iterate["type"]
49
+
50
+ case data_type
51
+ when "json"
52
+ iter = IterJson.new(data, iterate["path"])
53
+ when "xml"
54
+ iter = IterXML.new(data, iterate["path"])
55
+ else
56
+ raise "Unsupported data_type #{data_type}"
57
+ end
58
+
59
+ rows = 0
60
+ iter.each do |e|
61
+ rows += 1
62
+ @page_builder.add(schema.map{|c|
63
+ name = c["name"]
64
+ type = c["type"]
65
+ val = e[name].nil? ? "" : e[name]
66
+ case type
67
+ when "string"
68
+ val
69
+ when "long"
70
+ val.to_i
71
+ when "double"
72
+ val.to_f
73
+ when "boolean"
74
+ ["yes", "true", "1"].include?(val)
75
+ when "timestamp"
76
+ (val.nil? || val.empty?) ? nil : Time.strptime(val, c["format"])
77
+ else
78
+ raise "Unsupported type #{type}"
79
+ end
80
+ })
81
+ end
82
+ @page_builder.finish
83
+
84
+ {:rows => rows}
85
+ end
86
+
87
+ private
88
+
89
+ def fetch
90
+ uri = URI.parse(@task["url"])
91
+ method = @task["method"]
92
+ qs = URI.encode_www_form(@task["params"].map {|p|
93
+ [p["name"], p["value"]]
94
+ })
95
+ puts "#{method.upcase} #{uri}?#{qs}"
96
+
97
+ res = Net::HTTP.start(uri.host, uri.port) do |client|
98
+ client.open_timeout = @task["open_timeout"]
99
+ client.read_timeout = @task["read_timeout"]
100
+ case method.downcase
101
+ when "get"
102
+ client.get([uri.path, qs].join("?"))
103
+ when "post"
104
+ client.post(uri.path, qs)
105
+ else
106
+ raise "Unsupported method #{method}"
107
+ end
108
+ end
109
+
110
+ case res
111
+ when Net::HTTPSuccess
112
+ res
113
+ else
114
+ raise "Request is not successful, code=#{res.code}, value=#{res.body}"
115
+ end
116
+ end
117
+
118
+ class Iter
119
+ def initialize(data, path)
120
+ @data = data
121
+ @path = path
122
+ end
123
+
124
+ def each
125
+ raise NotImplementedError("each")
126
+ end
127
+ end
128
+
129
+ class IterXML < Iter
130
+ def initialize(data, path)
131
+ require "rexml/document"
132
+ super
133
+ @doc = REXML::Document.new(@data)
134
+ end
135
+
136
+ def each
137
+ @doc.elements.each(@path) do |e|
138
+ ret = {}
139
+ e.elements.each do |d|
140
+ ret[d.name] = d.text
141
+ end
142
+ yield ret
143
+ end
144
+ end
145
+ end
146
+
147
+ class IterJson < Iter
148
+ def initialize(data, path)
149
+ require "jsonpath"
150
+ super
151
+ @jsonpath = JsonPath.new(@path)
152
+ end
153
+
154
+ def each
155
+ @jsonpath.on(@data).flatten.each do |e|
156
+ raise "data is must be hash, but #{e.class}" unless e.instance_of?(Hash)
157
+ yield e
158
+ end
159
+ end
160
+ end
161
+
162
+ end
163
+ end
164
+ end
metadata ADDED
@@ -0,0 +1,94 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-input-http
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Takuma kanari
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-03-08 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: jsonpath
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.5'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0.5'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 0.9.2
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 0.9.2
55
+ description: fetch data via http
56
+ email:
57
+ - chemtrails.t@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".gitignore"
63
+ - Gemfile
64
+ - LICENSE.txt
65
+ - README.md
66
+ - Rakefile
67
+ - embulk-input-http.gemspec
68
+ - example/json-example.yml
69
+ - lib/embulk/input/http.rb
70
+ homepage: https://github.com/takumakanari/embulk-input-http
71
+ licenses:
72
+ - MIT
73
+ metadata: {}
74
+ post_install_message:
75
+ rdoc_options: []
76
+ require_paths:
77
+ - lib
78
+ required_ruby_version: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ required_rubygems_version: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - ">="
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ requirements: []
89
+ rubyforge_project:
90
+ rubygems_version: 2.2.2
91
+ signing_key:
92
+ specification_version: 4
93
+ summary: Embulk plugin for http input
94
+ test_files: []