embulk-input-http 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: d8ea58d778613a1a60214badcea8ea098f9dab77
4
+ data.tar.gz: c5eb80607d79558874a009a2dc47f12696053c7f
5
+ SHA512:
6
+ metadata.gz: 98ab84c0d458317898947ded60b4edfd6b4c132b1aaea930a485d76b14e408b742863fac125172bb46d86d54e9e6b6ea6c3aa6229832054b0dbd3233361ffe90
7
+ data.tar.gz: 984d9997c12a42438546d8b4acf28053fd2d3c6b5620fde4cbbf07718a6cdce631a7e0ffac8b5156005a284856c884c6c28f1f66bc7d8b3e24fca4922093d4ff
data/.gitignore ADDED
@@ -0,0 +1,10 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.gem
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source "https://rubygems.org"
2
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 TODO: Write your name
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,117 @@
1
+ # Embulk::Input::Http
2
+
3
+ Input HTTP plugin for [Embulk](https://github.com/embulk/embulk).
4
+ Read content via HTTP and parse/iterate json(or xml) data.
5
+
6
+ ## Installation
7
+
8
+ Run this command with your embulk binary.
9
+
10
+ ```ruby
11
+ $ embulk gem install embulk-input-http
12
+ ```
13
+
14
+ ## Usage
15
+
16
+ Specify in your config.yml file
17
+
18
+ ```yaml
19
+ in:
20
+ type: http
21
+ url: http://express.heartrails.com/api/json
22
+ params:
23
+ - {name: method, value: getStations}
24
+ - {name: x, value: 135.0}
25
+ - {name: y, value: 35.0}
26
+ schema:
27
+ - {name: name, type: string}
28
+ - {name: next, type: string}
29
+ - {name: prev, type: string}
30
+ - {name: distance, type: string}
31
+ - {name: x, type: double}
32
+ - {name: y, type: double}
33
+ - {name: line, type: string}
34
+ - {name: postal, type: string}
35
+ iterate: {type: json, path: $.response.station}
36
+ method: get
37
+ ```
38
+
39
+ - type: specify this plugin as `http`
40
+ - url: base url something like api (required)
41
+ - schema: specify the attribute of table and data type (required)
42
+ - iterate: data type and path to find root data, json/xml is supported for now (required)
43
+ - method: http method, get is used by default (optional)
44
+ - params: pair of name/value to specify query parameter (optional)
45
+
46
+
47
+ ### Iterate data
48
+
49
+ You can specify 2 types to parse result from HTTP api in *iterate* section.
50
+
51
+
52
+ #### json
53
+
54
+ For this type, you need to specify *path* as [jsonpath](http://goessner.net/articles/JsonPath/).
55
+
56
+ for example:
57
+
58
+ ```json
59
+ {
60
+ "result" : "success",
61
+ "students" : [
62
+ { "name" : "John", "age" : 10 },
63
+ { "name" : "Paul", "age" : 16 },
64
+ { "name" : "George", "age" : 17 },
65
+ { "name" : "Ringo", "age" : 18 }
66
+ ]
67
+ }
68
+ ```
69
+
70
+ You can iterate "students" node by the following condifuration:
71
+
72
+ iterate: {type: json, path: $.students}
73
+
74
+ #### xml
75
+
76
+ You can parse also xml by specifing **path/to/node** style to *path*.
77
+
78
+ for example:
79
+
80
+
81
+ ```xml
82
+ <data>
83
+ <result>true</result>
84
+ <students>
85
+ <student>
86
+ <name>John</name>
87
+ <age>10</name>
88
+ <student>
89
+ <student>
90
+ <name>Paul</name>
91
+ <age>16</name>
92
+ <student>
93
+ <student>
94
+ <name>George</name>
95
+ <age>17</name>
96
+ <student>
97
+ <student>
98
+ <name>Ringo</name>
99
+ <age>18</name>
100
+ <student>
101
+ </students>
102
+ ```
103
+
104
+ Configuration as below to iterate student node:
105
+
106
+ iterate: {type: xml, path: data/students/student}
107
+
108
+
109
+ ## TODO
110
+
111
+ - BasicAuth
112
+ - HTTP-proxy
113
+ - Breace-expansion style parameter, such as curl
114
+
115
+ ## Patch
116
+
117
+ Welcome!
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
@@ -0,0 +1,22 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "embulk-input-http"
7
+ spec.version = "0.0.2"
8
+ spec.authors = ["Takuma kanari"]
9
+ spec.email = ["chemtrails.t@gmail.com"]
10
+ spec.summary = %q{Embulk plugin for http input}
11
+ spec.description = %q{fetch data via http}
12
+ spec.homepage = "https://github.com/takumakanari/embulk-input-http"
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0")
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.require_paths = ["lib"]
18
+
19
+ spec.add_dependency "jsonpath", "~> 0.5"
20
+ spec.add_development_dependency "bundler", "~> 1.0"
21
+ spec.add_development_dependency "rake", "~> 0.9.2"
22
+ end
@@ -0,0 +1,21 @@
1
+ exec: {}
2
+ in:
3
+ type: http
4
+ url: http://express.heartrails.com/api/json
5
+ params:
6
+ - {name: method, value: getStations}
7
+ - {name: x, value: 135.0}
8
+ - {name: y, value: 35.0}
9
+ schema:
10
+ - {name: name, type: string}
11
+ - {name: next, type: string}
12
+ - {name: prev, type: string}
13
+ - {name: distance, type: string}
14
+ - {name: x, type: double}
15
+ - {name: y, type: double}
16
+ - {name: line, type: string}
17
+ - {name: postal, type: string}
18
+ method: get
19
+ iterate: {type: json, path: $.response.station}
20
+ out: {type: stdout}
21
+
@@ -0,0 +1,164 @@
1
+ require "net/http"
2
+ require "uri"
3
+
4
+ module Embulk
5
+ module Input
6
+
7
+ class HttpInputPlugin < InputPlugin
8
+ Plugin.register_input("http", self)
9
+
10
+ def self.transaction(config, &control)
11
+ url = config.param("url", :string)
12
+ schema = config.param("schema", :array)
13
+ method = config.param("method", :string, default: "get")
14
+ params = config.param("params", :array, default: [])
15
+ iterate = config.param("iterate", :hash)
16
+ open_timeout = config.param("open_timeout", :float, default: 2.0)
17
+ read_timeout = config.param("read_timeout", :float, default: 10.0)
18
+
19
+ data_type = iterate["type"]
20
+ unless ["json", "xml"].include?(data_type)
21
+ raise "Unknown data_type #{data_type}, only supported for json or xml"
22
+ end
23
+
24
+ columns = schema.each_with_index.map do |c, i|
25
+ Column.new(i, c["name"], c["type"].to_sym)
26
+ end
27
+
28
+ task = {
29
+ :url => url,
30
+ :method => method,
31
+ :params => params,
32
+ :schema => schema,
33
+ :iterate => iterate,
34
+ :open_timeout => open_timeout,
35
+ :read_timeout => read_timeout
36
+ }
37
+
38
+ report = yield(task, columns, 1)
39
+ config.merge(report["done"].flatten.compact)
40
+ {}
41
+ end
42
+
43
+ def run
44
+ schema = @task["schema"]
45
+ iterate = @task["iterate"]
46
+
47
+ data = fetch.body
48
+ data_type = iterate["type"]
49
+
50
+ case data_type
51
+ when "json"
52
+ iter = IterJson.new(data, iterate["path"])
53
+ when "xml"
54
+ iter = IterXML.new(data, iterate["path"])
55
+ else
56
+ raise "Unsupported data_type #{data_type}"
57
+ end
58
+
59
+ rows = 0
60
+ iter.each do |e|
61
+ rows += 1
62
+ @page_builder.add(schema.map{|c|
63
+ name = c["name"]
64
+ type = c["type"]
65
+ val = e[name].nil? ? "" : e[name]
66
+ case type
67
+ when "string"
68
+ val
69
+ when "long"
70
+ val.to_i
71
+ when "double"
72
+ val.to_f
73
+ when "boolean"
74
+ ["yes", "true", "1"].include?(val)
75
+ when "timestamp"
76
+ (val.nil? || val.empty?) ? nil : Time.strptime(val, c["format"])
77
+ else
78
+ raise "Unsupported type #{type}"
79
+ end
80
+ })
81
+ end
82
+ @page_builder.finish
83
+
84
+ {:rows => rows}
85
+ end
86
+
87
+ private
88
+
89
+ def fetch
90
+ uri = URI.parse(@task["url"])
91
+ method = @task["method"]
92
+ qs = URI.encode_www_form(@task["params"].map {|p|
93
+ [p["name"], p["value"]]
94
+ })
95
+ puts "#{method.upcase} #{uri}?#{qs}"
96
+
97
+ res = Net::HTTP.start(uri.host, uri.port) do |client|
98
+ client.open_timeout = @task["open_timeout"]
99
+ client.read_timeout = @task["read_timeout"]
100
+ case method.downcase
101
+ when "get"
102
+ client.get([uri.path, qs].join("?"))
103
+ when "post"
104
+ client.post(uri.path, qs)
105
+ else
106
+ raise "Unsupported method #{method}"
107
+ end
108
+ end
109
+
110
+ case res
111
+ when Net::HTTPSuccess
112
+ res
113
+ else
114
+ raise "Request is not successful, code=#{res.code}, value=#{res.body}"
115
+ end
116
+ end
117
+
118
+ class Iter
119
+ def initialize(data, path)
120
+ @data = data
121
+ @path = path
122
+ end
123
+
124
+ def each
125
+ raise NotImplementedError("each")
126
+ end
127
+ end
128
+
129
+ class IterXML < Iter
130
+ def initialize(data, path)
131
+ require "rexml/document"
132
+ super
133
+ @doc = REXML::Document.new(@data)
134
+ end
135
+
136
+ def each
137
+ @doc.elements.each(@path) do |e|
138
+ ret = {}
139
+ e.elements.each do |d|
140
+ ret[d.name] = d.text
141
+ end
142
+ yield ret
143
+ end
144
+ end
145
+ end
146
+
147
+ class IterJson < Iter
148
+ def initialize(data, path)
149
+ require "jsonpath"
150
+ super
151
+ @jsonpath = JsonPath.new(@path)
152
+ end
153
+
154
+ def each
155
+ @jsonpath.on(@data).flatten.each do |e|
156
+ raise "data is must be hash, but #{e.class}" unless e.instance_of?(Hash)
157
+ yield e
158
+ end
159
+ end
160
+ end
161
+
162
+ end
163
+ end
164
+ end
metadata ADDED
@@ -0,0 +1,94 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-input-http
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Takuma kanari
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-03-08 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: jsonpath
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.5'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0.5'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 0.9.2
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 0.9.2
55
+ description: fetch data via http
56
+ email:
57
+ - chemtrails.t@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".gitignore"
63
+ - Gemfile
64
+ - LICENSE.txt
65
+ - README.md
66
+ - Rakefile
67
+ - embulk-input-http.gemspec
68
+ - example/json-example.yml
69
+ - lib/embulk/input/http.rb
70
+ homepage: https://github.com/takumakanari/embulk-input-http
71
+ licenses:
72
+ - MIT
73
+ metadata: {}
74
+ post_install_message:
75
+ rdoc_options: []
76
+ require_paths:
77
+ - lib
78
+ required_ruby_version: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ required_rubygems_version: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - ">="
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ requirements: []
89
+ rubyforge_project:
90
+ rubygems_version: 2.2.2
91
+ signing_key:
92
+ specification_version: 4
93
+ summary: Embulk plugin for http input
94
+ test_files: []