mushikago-sdk 2.4.2 → 2.4.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +184 -2
- data/lib/mushikago/configuration.rb +1 -0
- data/lib/mushikago/http/request.rb +3 -2
- data/lib/mushikago/mitsubachi/client.rb +2 -0
- data/lib/mushikago/mitsubachi/http_fetch_request.rb +2 -0
- data/lib/mushikago/mitsubachi/http_push_request.rb +2 -0
- data/lib/mushikago/version.rb +1 -1
- data/spec/mushikago/mitsubachi/http_fetch_request_spec.rb +8 -0
- data/spec/mushikago/mitsubachi/http_push_request_spec.rb +8 -0
- metadata +20 -20
data/README.md
CHANGED
@@ -5,8 +5,8 @@ Mushikago SDK for Ruby.
|
|
5
5
|
- **Author**: MiningBrownie
|
6
6
|
- **Copyright**: 2011
|
7
7
|
- **License**: Apache License, Version 2.0
|
8
|
-
- **Latest Version**: 2.4.
|
9
|
-
- **Release Date**:
|
8
|
+
- **Latest Version**: 2.4.3
|
9
|
+
- **Release Date**: Oct 25th 2012
|
10
10
|
|
11
11
|
|
12
12
|
概要
|
@@ -115,6 +115,184 @@ Mushikago SDK for Rubyはgemを使ってインストールします。
|
|
115
115
|
# {"word"=>"very", "score"=>4.4986811569504646},
|
116
116
|
# {"word"=>"is", "score"=>3.3137419313374643}]
|
117
117
|
|
118
|
+
### hanamgriを利用する
|
119
|
+
|
120
|
+
以下のコードで[hanamgri](http://www.mushikago.org/hanamgri/)を利用することができます。
|
121
|
+
|
122
|
+
# -*- encoding: utf-8 -*-
|
123
|
+
require 'rubygems' # ruby 1.9系では不要
|
124
|
+
require 'mushikago'
|
125
|
+
|
126
|
+
client = Mushikago::Hanamgri::Client.new(:api_key => '<APIキー>', :secret_key => '<シークレットキー>')
|
127
|
+
|
128
|
+
# domain/create - ドメインを作成する
|
129
|
+
domain_name = "ec_products" # ドメイン名
|
130
|
+
seeds = "税込,送料" # キーワード
|
131
|
+
option = Hash.new
|
132
|
+
option[:description] = "ecサイト取得用のドメイン" # 抽出データ定義
|
133
|
+
option[:dictionary_name] = "mushikago/ecsite-20120530" # 使用したい辞書名
|
134
|
+
|
135
|
+
schema = Mushikago::Hanamgri::Schema.new do
|
136
|
+
add Mushikago::Hanamgri::Field.new('name', true, :string, "mushikago/ecsite/title-20120530")
|
137
|
+
add Mushikago::Hanamgri::Field.new('price', true, :number, "mushikago/ecsite/price-20120530")
|
138
|
+
end
|
139
|
+
|
140
|
+
name_knowledge = "mushikago/ecsite/title-20120530" # 使用したい学習データ名
|
141
|
+
price_knowledge = "mushikago/ecsite/price-20120530" # 使用したい学習データ名
|
142
|
+
|
143
|
+
schema = Mushikago::Hanamgri::Schema.new # 抽出データ定義
|
144
|
+
schema.add Mushikago::Hanamgri::Field.new('name', true, :string, name_knowledge) # 商品名を抽出するための定義
|
145
|
+
schema.add Mushikago::Hanamgri::Field.new('price', true, :number, price_knowledge) # 価格を抽出するための定義
|
146
|
+
schema.add Mushikago::Hanamgri::Field.new('detailed_features', false, :string) # 商品詳細を抽出するための定義
|
147
|
+
|
148
|
+
client.create_domain domain_name, seeds, schema, option
|
149
|
+
|
150
|
+
# domain/list - ドメインの一覧を取得する
|
151
|
+
option = Hash.new
|
152
|
+
option[:limit] = 3
|
153
|
+
option[:offset] = 3
|
154
|
+
option[:filter] = 'ec'
|
155
|
+
|
156
|
+
response = client.list_domains option
|
157
|
+
|
158
|
+
response['domains'].each do |domain|
|
159
|
+
puts domain['domain_name']
|
160
|
+
puts domain['description']
|
161
|
+
puts domain['updated_at']
|
162
|
+
end
|
163
|
+
puts response['total']
|
164
|
+
|
165
|
+
# domain/info - ドメインの情報を取得する
|
166
|
+
response = client.get_information domain_name
|
167
|
+
|
168
|
+
puts response['domain_name']
|
169
|
+
puts response['description']
|
170
|
+
puts response['created_at']
|
171
|
+
puts response['updated_at']
|
172
|
+
puts response['seeds']
|
173
|
+
puts response['schema']
|
174
|
+
|
175
|
+
# domain/update - ドメインの説明文を更新する
|
176
|
+
description = "このドメインは、家電ECサイト用のドメインです。" # ドメインの説明文
|
177
|
+
client.update_domain domain_name, description
|
178
|
+
|
179
|
+
# domain/get_queue_size - ドメインの未完了のタスク数を取得する
|
180
|
+
response = client.get_queue_size domain_name
|
181
|
+
|
182
|
+
puts response['queue_size']
|
183
|
+
|
184
|
+
# domain/train - キーワード学習と構造学習(教師付き学習)をさせる
|
185
|
+
domain_name = "ec_products" # ドメイン名
|
186
|
+
url = "http://ec.electronics.dummy/about" # urlもしくはhtml形式のテキスト
|
187
|
+
# 上記のurlはダミーです
|
188
|
+
|
189
|
+
# 構造学習のための教師データ
|
190
|
+
training_data = Mushikago::Hanamgri::TrainingData.new do
|
191
|
+
put("name", "サイクロンクリーナー")
|
192
|
+
put("price", "11,635円")
|
193
|
+
put('detailed_features', '遠心力でごみを分離するすごい掃除機')
|
194
|
+
end
|
195
|
+
|
196
|
+
client.train domain_name, url, training_data
|
197
|
+
|
198
|
+
# analysis/request - 解析を実行する
|
199
|
+
domain_name = "ec_products"# ドメイン名
|
200
|
+
url = "http://shop.example.com/" # 解析対象URL
|
201
|
+
# 上記のurlはダミーです
|
202
|
+
option[:tag] = "サイト1" # 解析結果を識別するためのタグ
|
203
|
+
|
204
|
+
response = client.request_analysis domain_name, url, option
|
205
|
+
|
206
|
+
puts response['request_id']
|
207
|
+
|
208
|
+
# analysis/list - 解析結果の一覧を取得する
|
209
|
+
domain_name = "ec_products" # ドメイン名
|
210
|
+
|
211
|
+
option = Hash.new
|
212
|
+
option[:limit] = 3 # 最大取得件数
|
213
|
+
option[:offset] = 3 # 開始位置
|
214
|
+
option[:filter] = 'サイト1' # 検索文字(tagに対して先頭一致)
|
215
|
+
option[:status] = 'complete' # 解析の状態
|
216
|
+
|
217
|
+
response = client.list_analyses domain_name, option
|
218
|
+
|
219
|
+
response['analyses'].each do |analysis|
|
220
|
+
puts analysis['request_id'] # 解析結果のリクエストID
|
221
|
+
puts analysis['save_url'] # 解析結果(blocks)の保存先url
|
222
|
+
puts analysis['status'] # 現在の解析の状態
|
223
|
+
puts analysis['updated_at'] # 解析情報の更新日
|
224
|
+
puts analysis['tag'] # 解析結果に設定されているtag
|
225
|
+
end
|
226
|
+
puts response['total'] # 解析情報の合計数が返ってきます
|
227
|
+
|
228
|
+
# analysis/get - 解析結果を取得する
|
229
|
+
domain_name = "ec_products" # ドメイン名
|
230
|
+
request_id = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" # リクエストID
|
231
|
+
|
232
|
+
response = client.get_analysis domain_name, request_id
|
233
|
+
|
234
|
+
puts response['analysis_data']['blocks'] # 解析により抽出されたHTMLの配列
|
235
|
+
puts response['analysis_data']['results'] # 解析によりblocksから抽出された結果の配列
|
236
|
+
|
237
|
+
# dictionary/save - ブロック抽出用辞書を保存する
|
238
|
+
option = Hash.new
|
239
|
+
option["description"] = "家電ecサイト用" # 辞書の説明
|
240
|
+
|
241
|
+
client.save_dictionary domain_name, option
|
242
|
+
|
243
|
+
# dictionary/list - ブロック抽出用辞書の一覧を取得する
|
244
|
+
option = Hash.new
|
245
|
+
option[:limit] = 3 # 最大取得件数
|
246
|
+
option[:offset] = 3 # 開始位置
|
247
|
+
|
248
|
+
response = client.list_dictionaries option
|
249
|
+
|
250
|
+
# ブロック抽出用辞書の情報が入った配列
|
251
|
+
response['dictionaries'].each do |dictionary|
|
252
|
+
puts dictionary['dictionary_name'] # 辞書名
|
253
|
+
puts dictionary['description'] # 辞書の説明
|
254
|
+
puts dictionary['created_at'] # 辞書の作成日
|
255
|
+
end
|
256
|
+
puts response['total'] # 辞書の合計数が返ってきます
|
257
|
+
|
258
|
+
# dictionary/delete - 保存したブロック抽出用辞書を削除する
|
259
|
+
dictionary_name = "xxxxx-xxxxxxxxx-xxxx-xxxx-xxxxx-xxxxxxxxxxxx" # 辞書名
|
260
|
+
|
261
|
+
client.delete_dictionary dictionary_name
|
262
|
+
|
263
|
+
# knowledge/save - パラメータ抽出のための学習データを保存する
|
264
|
+
field_name = "name" # フィールド名
|
265
|
+
option = Hash.new
|
266
|
+
option[:description] = "家電用ecサイトの商品名" # 保存する学習データの説明文
|
267
|
+
|
268
|
+
client.save_knowledge domain_name, field_name, option
|
269
|
+
|
270
|
+
# knowledge/list - パラメータ抽出のための学習データの一覧を取得する
|
271
|
+
option = Hash.new
|
272
|
+
option[:limit] = 3 # 最大取得件数
|
273
|
+
option[:offset] = 3 # 開始位置
|
274
|
+
option[:status] = 'complete'# 学習データ保存の進捗状況
|
275
|
+
|
276
|
+
response = client.list_knowledges option
|
277
|
+
|
278
|
+
# 保存した学習データの情報が入った配列
|
279
|
+
response['knowledges'].each do |knowledge|
|
280
|
+
puts knowledge['knowledge_name'] # 学習データ名
|
281
|
+
puts knowledge['description'] # 学習データの説明文
|
282
|
+
puts knowledge['status'] # 学習データ保存の進捗状況
|
283
|
+
puts knowledge['created_at'] # 学習データの作成日
|
284
|
+
end
|
285
|
+
puts response['total'] # ヒットした学習データの総数
|
286
|
+
|
287
|
+
# knowledge/delete - 保存したパラメータ抽出のための学習データを削除する
|
288
|
+
knowledge_name = "xxxxxxxxx/xxxxx/xxxxx-xxxxxxxxx-xxxxx-xxxx-xxxx-xxxxxxxxxxxx" # 学習データ名
|
289
|
+
|
290
|
+
client.delete_knowledge knowledge_name
|
291
|
+
|
292
|
+
# domain/delete - 作成したドメインを削除する
|
293
|
+
domain_name = "ec_products" # ドメイン名
|
294
|
+
|
295
|
+
client.delete_domain domain_name
|
118
296
|
|
119
297
|
#### APIキーとシークレットキーの設定方法
|
120
298
|
|
@@ -156,6 +334,10 @@ $ export MUSHIKAGO_SECRET_KEY=<シークレットキー>
|
|
156
334
|
変更履歴
|
157
335
|
--------
|
158
336
|
|
337
|
+
- **Oct.25.12**: 2.4.3 release
|
338
|
+
- http_fetch/pushに:encodingオプション追加
|
339
|
+
- **Sep.14.12**: 2.4.2 release
|
340
|
+
- mitsubachiのproject_infoのオプション変更に対応
|
159
341
|
- **Aug.27.12**: 2.4.1 release
|
160
342
|
- tomboのcapturesにstateがない件を修正
|
161
343
|
- **Aug.20.12**: 2.4 release
|
@@ -39,9 +39,10 @@ module Mushikago
|
|
39
39
|
param ? param[1] : nil
|
40
40
|
end
|
41
41
|
|
42
|
+
|
42
43
|
# @return [String] URLエンコードされ、&で接続されたパラメータの文字列
|
43
44
|
def url_encoded_params
|
44
|
-
params.sort.select{|p| p[1].kind_of?(String)}.collect{|pp| pp.map{|p|
|
45
|
+
params.sort.select{|p| p[1].kind_of?(String)}.collect{|pp| pp.map{|p| encoding p}.join('=')}.join('&')
|
45
46
|
end
|
46
47
|
|
47
48
|
# HTTPリクエストオブジェクトに変換する
|
@@ -67,7 +68,7 @@ module Mushikago
|
|
67
68
|
# @param [String] s URLエンコード対象の文字列
|
68
69
|
# @return [String] URLエンコードされた文字列
|
69
70
|
private
|
70
|
-
def
|
71
|
+
def encoding s
|
71
72
|
CGI.escape(s).gsub('+', '%20')
|
72
73
|
end
|
73
74
|
|
@@ -157,6 +157,7 @@ module Mushikago
|
|
157
157
|
# @option options [String] :group_id
|
158
158
|
# @option options [String] :unique_key
|
159
159
|
# @option options [Integer] :unique_key_expires
|
160
|
+
# @option options [String] :encode
|
160
161
|
# @example
|
161
162
|
# client.http_fetch('project01', 'http://www.tombo.ne.jp/', 'sample.rb', :follow_redirect => true)
|
162
163
|
# @return [Mushikago::Http::Response] リクエストの結果
|
@@ -182,6 +183,7 @@ module Mushikago
|
|
182
183
|
# @option options [String] :group_id
|
183
184
|
# @option options [String] :unique_key
|
184
185
|
# @option options [Integer] :unique_key_expires
|
186
|
+
# @option options [String] :encode
|
185
187
|
# @example
|
186
188
|
# client.http_push('project01', 'http://www.tombo.ne.jp/', 'sample.rb')
|
187
189
|
# @return [Mushikago::Http::Response] リクエストの結果
|
@@ -16,6 +16,7 @@ module Mushikago
|
|
16
16
|
request_parameter :group_id
|
17
17
|
request_parameter :unique_key
|
18
18
|
request_parameter :unique_key_expires do |v| v.to_s end
|
19
|
+
request_parameter :encode
|
19
20
|
|
20
21
|
def initialize project_name, url, script_name, options={}
|
21
22
|
super(options)
|
@@ -32,6 +33,7 @@ module Mushikago
|
|
32
33
|
self.group_id = options[:group_id] if options.has_key?(:group_id)
|
33
34
|
self.unique_key = options[:unique_key] if options.has_key?(:unique_key)
|
34
35
|
self.unique_key_expires = options[:unique_key_expires] if options.has_key?(:unique_key_expires)
|
36
|
+
self.encode = options[:encode] if options.has_key?(:encode)
|
35
37
|
end
|
36
38
|
end
|
37
39
|
end
|
@@ -17,6 +17,7 @@ module Mushikago
|
|
17
17
|
request_parameter :group_id
|
18
18
|
request_parameter :unique_key
|
19
19
|
request_parameter :unique_key_expires do |v| v.to_s end
|
20
|
+
request_parameter :encode
|
20
21
|
|
21
22
|
def initialize project_name, url, script_name, file_name, file_input_key, options={}
|
22
23
|
super(options)
|
@@ -34,6 +35,7 @@ module Mushikago
|
|
34
35
|
self.group_id = options[:group_id] if options.has_key?(:group_id)
|
35
36
|
self.unique_key = options[:unique_key] if options.has_key?(:unique_key)
|
36
37
|
self.unique_key_expires = options[:unique_key_expires] if options.has_key?(:unique_key_expires)
|
38
|
+
self.encode = options[:encode] if options.has_key?(:encode)
|
37
39
|
end
|
38
40
|
end
|
39
41
|
end
|
data/lib/mushikago/version.rb
CHANGED
@@ -11,4 +11,12 @@ describe Mushikago::Mitsubachi::HttpFetchRequest do
|
|
11
11
|
request.cookiejar.should == '[{"name":"name","value":"value","domain":"domain","path":"path","secure":true}]'
|
12
12
|
end
|
13
13
|
end
|
14
|
+
|
15
|
+
context 'encodeのテスト' do
|
16
|
+
it 'requestクラスにencodeのパラメータが指定されていること' do
|
17
|
+
encode = 'sjis'
|
18
|
+
request = Mushikago::Mitsubachi::HttpFetchRequest.new('p','u','s',:encode => encode)
|
19
|
+
request.encode.should == encode
|
20
|
+
end
|
21
|
+
end
|
14
22
|
end
|
@@ -11,5 +11,13 @@ describe Mushikago::Mitsubachi::HttpPushRequest do
|
|
11
11
|
request.cookiejar.should == '[{"name":"name","value":"value","domain":"domain","path":"path","secure":true}]'
|
12
12
|
end
|
13
13
|
end
|
14
|
+
|
15
|
+
context 'encodeのテスト' do
|
16
|
+
it 'requestクラスにencodeのパラメータが指定されていること' do
|
17
|
+
encode = 'sjis'
|
18
|
+
request = Mushikago::Mitsubachi::HttpFetchRequest.new('p','u','s',:encode => encode)
|
19
|
+
request.encode.should == encode
|
20
|
+
end
|
21
|
+
end
|
14
22
|
end
|
15
23
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mushikago-sdk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.4.
|
4
|
+
version: 2.4.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-10-25 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: json
|
16
|
-
requirement: &
|
16
|
+
requirement: &2154699000 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *2154699000
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: mime-types
|
27
|
-
requirement: &
|
27
|
+
requirement: &2154698560 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *2154698560
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: rake
|
38
|
-
requirement: &
|
38
|
+
requirement: &2154698140 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *2154698140
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: maruku
|
49
|
-
requirement: &
|
49
|
+
requirement: &2154697720 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *2154697720
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: yard
|
60
|
-
requirement: &
|
60
|
+
requirement: &2154697300 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ! '>='
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: '0'
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *2154697300
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: rspec
|
71
|
-
requirement: &
|
71
|
+
requirement: &2154696800 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ~>
|
@@ -76,10 +76,10 @@ dependencies:
|
|
76
76
|
version: 2.6.0
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *2154696800
|
80
80
|
- !ruby/object:Gem::Dependency
|
81
81
|
name: ci_reporter
|
82
|
-
requirement: &
|
82
|
+
requirement: &2154696380 !ruby/object:Gem::Requirement
|
83
83
|
none: false
|
84
84
|
requirements:
|
85
85
|
- - ! '>='
|
@@ -87,10 +87,10 @@ dependencies:
|
|
87
87
|
version: '0'
|
88
88
|
type: :development
|
89
89
|
prerelease: false
|
90
|
-
version_requirements: *
|
90
|
+
version_requirements: *2154696380
|
91
91
|
- !ruby/object:Gem::Dependency
|
92
92
|
name: ZenTest
|
93
|
-
requirement: &
|
93
|
+
requirement: &2154695920 !ruby/object:Gem::Requirement
|
94
94
|
none: false
|
95
95
|
requirements:
|
96
96
|
- - ! '>='
|
@@ -98,10 +98,10 @@ dependencies:
|
|
98
98
|
version: '0'
|
99
99
|
type: :development
|
100
100
|
prerelease: false
|
101
|
-
version_requirements: *
|
101
|
+
version_requirements: *2154695920
|
102
102
|
- !ruby/object:Gem::Dependency
|
103
103
|
name: bundler
|
104
|
-
requirement: &
|
104
|
+
requirement: &2154695500 !ruby/object:Gem::Requirement
|
105
105
|
none: false
|
106
106
|
requirements:
|
107
107
|
- - ! '>='
|
@@ -109,7 +109,7 @@ dependencies:
|
|
109
109
|
version: '0'
|
110
110
|
type: :development
|
111
111
|
prerelease: false
|
112
|
-
version_requirements: *
|
112
|
+
version_requirements: *2154695500
|
113
113
|
description: A SDK for Mushikago Web Service.
|
114
114
|
email:
|
115
115
|
- t.matsuoka@miningbrownie.co.jp
|