mushikago-sdk 2.4.2 → 2.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +184 -2
- data/lib/mushikago/configuration.rb +1 -0
- data/lib/mushikago/http/request.rb +3 -2
- data/lib/mushikago/mitsubachi/client.rb +2 -0
- data/lib/mushikago/mitsubachi/http_fetch_request.rb +2 -0
- data/lib/mushikago/mitsubachi/http_push_request.rb +2 -0
- data/lib/mushikago/version.rb +1 -1
- data/spec/mushikago/mitsubachi/http_fetch_request_spec.rb +8 -0
- data/spec/mushikago/mitsubachi/http_push_request_spec.rb +8 -0
- metadata +20 -20
data/README.md
CHANGED
@@ -5,8 +5,8 @@ Mushikago SDK for Ruby.
|
|
5
5
|
- **Author**: MiningBrownie
|
6
6
|
- **Copyright**: 2011
|
7
7
|
- **License**: Apache License, Version 2.0
|
8
|
-
- **Latest Version**: 2.4.
|
9
|
-
- **Release Date**:
|
8
|
+
- **Latest Version**: 2.4.3
|
9
|
+
- **Release Date**: Oct 25th 2012
|
10
10
|
|
11
11
|
|
12
12
|
概要
|
@@ -115,6 +115,184 @@ Mushikago SDK for Rubyはgemを使ってインストールします。
|
|
115
115
|
# {"word"=>"very", "score"=>4.4986811569504646},
|
116
116
|
# {"word"=>"is", "score"=>3.3137419313374643}]
|
117
117
|
|
118
|
+
### hanamgriを利用する
|
119
|
+
|
120
|
+
以下のコードで[hanamgri](http://www.mushikago.org/hanamgri/)を利用することができます。
|
121
|
+
|
122
|
+
# -*- encoding: utf-8 -*-
|
123
|
+
require 'rubygems' # ruby 1.9系では不要
|
124
|
+
require 'mushikago'
|
125
|
+
|
126
|
+
client = Mushikago::Hanamgri::Client.new(:api_key => '<APIキー>', :secret_key => '<シークレットキー>')
|
127
|
+
|
128
|
+
# domain/create - ドメインを作成する
|
129
|
+
domain_name = "ec_products" # ドメイン名
|
130
|
+
seeds = "税込,送料" # キーワード
|
131
|
+
option = Hash.new
|
132
|
+
option[:description] = "ecサイト取得用のドメイン" # 抽出データ定義
|
133
|
+
option[:dictionary_name] = "mushikago/ecsite-20120530" # 使用したい辞書名
|
134
|
+
|
135
|
+
schema = Mushikago::Hanamgri::Schema.new do
|
136
|
+
add Mushikago::Hanamgri::Field.new('name', true, :string, "mushikago/ecsite/title-20120530")
|
137
|
+
add Mushikago::Hanamgri::Field.new('price', true, :number, "mushikago/ecsite/price-20120530")
|
138
|
+
end
|
139
|
+
|
140
|
+
name_knowledge = "mushikago/ecsite/title-20120530" # 使用したい学習データ名
|
141
|
+
price_knowledge = "mushikago/ecsite/price-20120530" # 使用したい学習データ名
|
142
|
+
|
143
|
+
schema = Mushikago::Hanamgri::Schema.new # 抽出データ定義
|
144
|
+
schema.add Mushikago::Hanamgri::Field.new('name', true, :string, name_knowledge) # 商品名を抽出するための定義
|
145
|
+
schema.add Mushikago::Hanamgri::Field.new('price', true, :number, price_knowledge) # 価格を抽出するための定義
|
146
|
+
schema.add Mushikago::Hanamgri::Field.new('detailed_features', false, :string) # 商品詳細を抽出するための定義
|
147
|
+
|
148
|
+
client.create_domain domain_name, seeds, schema, option
|
149
|
+
|
150
|
+
# domain/list - ドメインの一覧を取得する
|
151
|
+
option = Hash.new
|
152
|
+
option[:limit] = 3
|
153
|
+
option[:offset] = 3
|
154
|
+
option[:filter] = 'ec'
|
155
|
+
|
156
|
+
response = client.list_domains option
|
157
|
+
|
158
|
+
response['domains'].each do |domain|
|
159
|
+
puts domain['domain_name']
|
160
|
+
puts domain['description']
|
161
|
+
puts domain['updated_at']
|
162
|
+
end
|
163
|
+
puts response['total']
|
164
|
+
|
165
|
+
# domain/info - ドメインの情報を取得する
|
166
|
+
response = client.get_information domain_name
|
167
|
+
|
168
|
+
puts response['domain_name']
|
169
|
+
puts response['description']
|
170
|
+
puts response['created_at']
|
171
|
+
puts response['updated_at']
|
172
|
+
puts response['seeds']
|
173
|
+
puts response['schema']
|
174
|
+
|
175
|
+
# domain/update - ドメインの説明文を更新する
|
176
|
+
description = "このドメインは、家電ECサイト用のドメインです。" # ドメインの説明文
|
177
|
+
client.update_domain domain_name, description
|
178
|
+
|
179
|
+
# domain/get_queue_size - ドメインの未完了のタスク数を取得する
|
180
|
+
response = client.get_queue_size domain_name
|
181
|
+
|
182
|
+
puts response['queue_size']
|
183
|
+
|
184
|
+
# domain/train - キーワード学習と構造学習(教師付き学習)をさせる
|
185
|
+
domain_name = "ec_products" # ドメイン名
|
186
|
+
url = "http://ec.electronics.dummy/about" # urlもしくはhtml形式のテキスト
|
187
|
+
# 上記のurlはダミーです
|
188
|
+
|
189
|
+
# 構造学習のための教師データ
|
190
|
+
training_data = Mushikago::Hanamgri::TrainingData.new do
|
191
|
+
put("name", "サイクロンクリーナー")
|
192
|
+
put("price", "11,635円")
|
193
|
+
put('detailed_features', '遠心力でごみを分離するすごい掃除機')
|
194
|
+
end
|
195
|
+
|
196
|
+
client.train domain_name, url, training_data
|
197
|
+
|
198
|
+
# analysis/request - 解析を実行する
|
199
|
+
domain_name = "ec_products"# ドメイン名
|
200
|
+
url = "http://shop.example.com/" # 解析対象URL
|
201
|
+
# 上記のurlはダミーです
|
202
|
+
option[:tag] = "サイト1" # 解析結果を識別するためのタグ
|
203
|
+
|
204
|
+
response = client.request_analysis domain_name, url, option
|
205
|
+
|
206
|
+
puts response['request_id']
|
207
|
+
|
208
|
+
# analysis/list - 解析結果の一覧を取得する
|
209
|
+
domain_name = "ec_products" # ドメイン名
|
210
|
+
|
211
|
+
option = Hash.new
|
212
|
+
option[:limit] = 3 # 最大取得件数
|
213
|
+
option[:offset] = 3 # 開始位置
|
214
|
+
option[:filter] = 'サイト1' # 検索文字(tagに対して先頭一致)
|
215
|
+
option[:status] = 'complete' # 解析の状態
|
216
|
+
|
217
|
+
response = client.list_analyses domain_name, option
|
218
|
+
|
219
|
+
response['analyses'].each do |analysis|
|
220
|
+
puts analysis['request_id'] # 解析結果のリクエストID
|
221
|
+
puts analysis['save_url'] # 解析結果(blocks)の保存先url
|
222
|
+
puts analysis['status'] # 現在の解析の状態
|
223
|
+
puts analysis['updated_at'] # 解析情報の更新日
|
224
|
+
puts analysis['tag'] # 解析結果に設定されているtag
|
225
|
+
end
|
226
|
+
puts response['total'] # 解析情報の合計数が返ってきます
|
227
|
+
|
228
|
+
# analysis/get - 解析結果を取得する
|
229
|
+
domain_name = "ec_products" # ドメイン名
|
230
|
+
request_id = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" # リクエストID
|
231
|
+
|
232
|
+
response = client.get_analysis domain_name, request_id
|
233
|
+
|
234
|
+
puts response['analysis_data']['blocks'] # 解析により抽出されたHTMLの配列
|
235
|
+
puts response['analysis_data']['results'] # 解析によりblocksから抽出された結果の配列
|
236
|
+
|
237
|
+
# dictionary/save - ブロック抽出用辞書を保存する
|
238
|
+
option = Hash.new
|
239
|
+
option["description"] = "家電ecサイト用" # 辞書の説明
|
240
|
+
|
241
|
+
client.save_dictionary domain_name, option
|
242
|
+
|
243
|
+
# dictionary/list - ブロック抽出用辞書の一覧を取得する
|
244
|
+
option = Hash.new
|
245
|
+
option[:limit] = 3 # 最大取得件数
|
246
|
+
option[:offset] = 3 # 開始位置
|
247
|
+
|
248
|
+
response = client.list_dictionaries option
|
249
|
+
|
250
|
+
# ブロック抽出用辞書の情報が入った配列
|
251
|
+
response['dictionaries'].each do |dictionary|
|
252
|
+
puts dictionary['dictionary_name'] # 辞書名
|
253
|
+
puts dictionary['description'] # 辞書の説明
|
254
|
+
puts dictionary['created_at'] # 辞書の作成日
|
255
|
+
end
|
256
|
+
puts response['total'] # 辞書の合計数が返ってきます
|
257
|
+
|
258
|
+
# dictionary/delete - 保存したブロック抽出用辞書を削除する
|
259
|
+
dictionary_name = "xxxxx-xxxxxxxxx-xxxx-xxxx-xxxxx-xxxxxxxxxxxx" # 辞書名
|
260
|
+
|
261
|
+
client.delete_dictionary dictionary_name
|
262
|
+
|
263
|
+
# knowledge/save - パラメータ抽出のための学習データを保存する
|
264
|
+
field_name = "name" # フィールド名
|
265
|
+
option = Hash.new
|
266
|
+
option[:description] = "家電用ecサイトの商品名" # 保存する学習データの説明文
|
267
|
+
|
268
|
+
client.save_knowledge domain_name, field_name, option
|
269
|
+
|
270
|
+
# knowledge/list - パラメータ抽出のための学習データの一覧を取得する
|
271
|
+
option = Hash.new
|
272
|
+
option[:limit] = 3 # 最大取得件数
|
273
|
+
option[:offset] = 3 # 開始位置
|
274
|
+
option[:status] = 'complete'# 学習データ保存の進捗状況
|
275
|
+
|
276
|
+
response = client.list_knowledges option
|
277
|
+
|
278
|
+
# 保存した学習データの情報が入った配列
|
279
|
+
response['knowledges'].each do |knowledge|
|
280
|
+
puts knowledge['knowledge_name'] # 学習データ名
|
281
|
+
puts knowledge['description'] # 学習データの説明文
|
282
|
+
puts knowledge['status'] # 学習データ保存の進捗状況
|
283
|
+
puts knowledge['created_at'] # 学習データの作成日
|
284
|
+
end
|
285
|
+
puts response['total'] # ヒットした学習データの総数
|
286
|
+
|
287
|
+
# knowledge/delete - 保存したパラメータ抽出のための学習データを削除する
|
288
|
+
knowledge_name = "xxxxxxxxx/xxxxx/xxxxx-xxxxxxxxx-xxxxx-xxxx-xxxx-xxxxxxxxxxxx" # 学習データ名
|
289
|
+
|
290
|
+
client.delete_knowledge knowledge_name
|
291
|
+
|
292
|
+
# domain/delete - 作成したドメインを削除する
|
293
|
+
domain_name = "ec_products" # ドメイン名
|
294
|
+
|
295
|
+
client.delete_domain domain_name
|
118
296
|
|
119
297
|
#### APIキーとシークレットキーの設定方法
|
120
298
|
|
@@ -156,6 +334,10 @@ $ export MUSHIKAGO_SECRET_KEY=<シークレットキー>
|
|
156
334
|
変更履歴
|
157
335
|
--------
|
158
336
|
|
337
|
+
- **Oct.25.12**: 2.4.3 release
|
338
|
+
- http_fetch/pushに:encodingオプション追加
|
339
|
+
- **Sep.14.12**: 2.4.2 release
|
340
|
+
- mitsubachiのproject_infoのオプション変更に対応
|
159
341
|
- **Aug.27.12**: 2.4.1 release
|
160
342
|
- tomboのcapturesにstateがない件を修正
|
161
343
|
- **Aug.20.12**: 2.4 release
|
@@ -39,9 +39,10 @@ module Mushikago
|
|
39
39
|
param ? param[1] : nil
|
40
40
|
end
|
41
41
|
|
42
|
+
|
42
43
|
# @return [String] URLエンコードされ、&で接続されたパラメータの文字列
|
43
44
|
def url_encoded_params
|
44
|
-
params.sort.select{|p| p[1].kind_of?(String)}.collect{|pp| pp.map{|p|
|
45
|
+
params.sort.select{|p| p[1].kind_of?(String)}.collect{|pp| pp.map{|p| encoding p}.join('=')}.join('&')
|
45
46
|
end
|
46
47
|
|
47
48
|
# HTTPリクエストオブジェクトに変換する
|
@@ -67,7 +68,7 @@ module Mushikago
|
|
67
68
|
# @param [String] s URLエンコード対象の文字列
|
68
69
|
# @return [String] URLエンコードされた文字列
|
69
70
|
private
|
70
|
-
def
|
71
|
+
def encoding s
|
71
72
|
CGI.escape(s).gsub('+', '%20')
|
72
73
|
end
|
73
74
|
|
@@ -157,6 +157,7 @@ module Mushikago
|
|
157
157
|
# @option options [String] :group_id
|
158
158
|
# @option options [String] :unique_key
|
159
159
|
# @option options [Integer] :unique_key_expires
|
160
|
+
# @option options [String] :encode
|
160
161
|
# @example
|
161
162
|
# client.http_fetch('project01', 'http://www.tombo.ne.jp/', 'sample.rb', :follow_redirect => true)
|
162
163
|
# @return [Mushikago::Http::Response] リクエストの結果
|
@@ -182,6 +183,7 @@ module Mushikago
|
|
182
183
|
# @option options [String] :group_id
|
183
184
|
# @option options [String] :unique_key
|
184
185
|
# @option options [Integer] :unique_key_expires
|
186
|
+
# @option options [String] :encode
|
185
187
|
# @example
|
186
188
|
# client.http_push('project01', 'http://www.tombo.ne.jp/', 'sample.rb')
|
187
189
|
# @return [Mushikago::Http::Response] リクエストの結果
|
@@ -16,6 +16,7 @@ module Mushikago
|
|
16
16
|
request_parameter :group_id
|
17
17
|
request_parameter :unique_key
|
18
18
|
request_parameter :unique_key_expires do |v| v.to_s end
|
19
|
+
request_parameter :encode
|
19
20
|
|
20
21
|
def initialize project_name, url, script_name, options={}
|
21
22
|
super(options)
|
@@ -32,6 +33,7 @@ module Mushikago
|
|
32
33
|
self.group_id = options[:group_id] if options.has_key?(:group_id)
|
33
34
|
self.unique_key = options[:unique_key] if options.has_key?(:unique_key)
|
34
35
|
self.unique_key_expires = options[:unique_key_expires] if options.has_key?(:unique_key_expires)
|
36
|
+
self.encode = options[:encode] if options.has_key?(:encode)
|
35
37
|
end
|
36
38
|
end
|
37
39
|
end
|
@@ -17,6 +17,7 @@ module Mushikago
|
|
17
17
|
request_parameter :group_id
|
18
18
|
request_parameter :unique_key
|
19
19
|
request_parameter :unique_key_expires do |v| v.to_s end
|
20
|
+
request_parameter :encode
|
20
21
|
|
21
22
|
def initialize project_name, url, script_name, file_name, file_input_key, options={}
|
22
23
|
super(options)
|
@@ -34,6 +35,7 @@ module Mushikago
|
|
34
35
|
self.group_id = options[:group_id] if options.has_key?(:group_id)
|
35
36
|
self.unique_key = options[:unique_key] if options.has_key?(:unique_key)
|
36
37
|
self.unique_key_expires = options[:unique_key_expires] if options.has_key?(:unique_key_expires)
|
38
|
+
self.encode = options[:encode] if options.has_key?(:encode)
|
37
39
|
end
|
38
40
|
end
|
39
41
|
end
|
data/lib/mushikago/version.rb
CHANGED
@@ -11,4 +11,12 @@ describe Mushikago::Mitsubachi::HttpFetchRequest do
|
|
11
11
|
request.cookiejar.should == '[{"name":"name","value":"value","domain":"domain","path":"path","secure":true}]'
|
12
12
|
end
|
13
13
|
end
|
14
|
+
|
15
|
+
context 'encodeのテスト' do
|
16
|
+
it 'requestクラスにencodeのパラメータが指定されていること' do
|
17
|
+
encode = 'sjis'
|
18
|
+
request = Mushikago::Mitsubachi::HttpFetchRequest.new('p','u','s',:encode => encode)
|
19
|
+
request.encode.should == encode
|
20
|
+
end
|
21
|
+
end
|
14
22
|
end
|
@@ -11,5 +11,13 @@ describe Mushikago::Mitsubachi::HttpPushRequest do
|
|
11
11
|
request.cookiejar.should == '[{"name":"name","value":"value","domain":"domain","path":"path","secure":true}]'
|
12
12
|
end
|
13
13
|
end
|
14
|
+
|
15
|
+
context 'encodeのテスト' do
|
16
|
+
it 'requestクラスにencodeのパラメータが指定されていること' do
|
17
|
+
encode = 'sjis'
|
18
|
+
request = Mushikago::Mitsubachi::HttpFetchRequest.new('p','u','s',:encode => encode)
|
19
|
+
request.encode.should == encode
|
20
|
+
end
|
21
|
+
end
|
14
22
|
end
|
15
23
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mushikago-sdk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.4.
|
4
|
+
version: 2.4.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-10-25 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: json
|
16
|
-
requirement: &
|
16
|
+
requirement: &2154699000 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *2154699000
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: mime-types
|
27
|
-
requirement: &
|
27
|
+
requirement: &2154698560 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *2154698560
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: rake
|
38
|
-
requirement: &
|
38
|
+
requirement: &2154698140 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *2154698140
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: maruku
|
49
|
-
requirement: &
|
49
|
+
requirement: &2154697720 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *2154697720
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: yard
|
60
|
-
requirement: &
|
60
|
+
requirement: &2154697300 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ! '>='
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: '0'
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *2154697300
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: rspec
|
71
|
-
requirement: &
|
71
|
+
requirement: &2154696800 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ~>
|
@@ -76,10 +76,10 @@ dependencies:
|
|
76
76
|
version: 2.6.0
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *2154696800
|
80
80
|
- !ruby/object:Gem::Dependency
|
81
81
|
name: ci_reporter
|
82
|
-
requirement: &
|
82
|
+
requirement: &2154696380 !ruby/object:Gem::Requirement
|
83
83
|
none: false
|
84
84
|
requirements:
|
85
85
|
- - ! '>='
|
@@ -87,10 +87,10 @@ dependencies:
|
|
87
87
|
version: '0'
|
88
88
|
type: :development
|
89
89
|
prerelease: false
|
90
|
-
version_requirements: *
|
90
|
+
version_requirements: *2154696380
|
91
91
|
- !ruby/object:Gem::Dependency
|
92
92
|
name: ZenTest
|
93
|
-
requirement: &
|
93
|
+
requirement: &2154695920 !ruby/object:Gem::Requirement
|
94
94
|
none: false
|
95
95
|
requirements:
|
96
96
|
- - ! '>='
|
@@ -98,10 +98,10 @@ dependencies:
|
|
98
98
|
version: '0'
|
99
99
|
type: :development
|
100
100
|
prerelease: false
|
101
|
-
version_requirements: *
|
101
|
+
version_requirements: *2154695920
|
102
102
|
- !ruby/object:Gem::Dependency
|
103
103
|
name: bundler
|
104
|
-
requirement: &
|
104
|
+
requirement: &2154695500 !ruby/object:Gem::Requirement
|
105
105
|
none: false
|
106
106
|
requirements:
|
107
107
|
- - ! '>='
|
@@ -109,7 +109,7 @@ dependencies:
|
|
109
109
|
version: '0'
|
110
110
|
type: :development
|
111
111
|
prerelease: false
|
112
|
-
version_requirements: *
|
112
|
+
version_requirements: *2154695500
|
113
113
|
description: A SDK for Mushikago Web Service.
|
114
114
|
email:
|
115
115
|
- t.matsuoka@miningbrownie.co.jp
|