ruboty-ymcrawl 0.0.8 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ruboty/handlers/ymcrawl.rb +2 -9
- data/lib/ruboty/ymcrawl/main.rb +139 -133
- data/lib/ruboty/ymcrawl/version.rb +1 -1
- data/lib/ruboty/ymcrawl.rb +0 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a30b00f8ef3a0f7058f46fdd58206e0170d95592
|
4
|
+
data.tar.gz: 9526594621419c35b4e9ed73dbdd1051ea1536a0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2a1f83c1f5b8572579b8fa86451a1d9d3387e3bfeaca7174fa4ef78bff2dce68375a433ce2737e9464f583242e59eadc43acc564f256c30e76b0355139ec9ca2
|
7
|
+
data.tar.gz: 4a2909657f41bb3a11e90962e81c5ba966c6ac139dc9401785851503bdd14baf59462b3a97bfae3646d1f76b129e29213c3f12268a0e5ddca9d77e7a0b906693
|
@@ -11,7 +11,7 @@ module Ruboty
|
|
11
11
|
end
|
12
12
|
|
13
13
|
def get_crawl
|
14
|
-
@crawl = YMCrawl::Core.new if @crawl == nil
|
14
|
+
@crawl = Ruboty::YMCrawl::Core.new if @crawl == nil
|
15
15
|
@crawl
|
16
16
|
end
|
17
17
|
end
|
@@ -24,14 +24,6 @@ module Ruboty
|
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
27
|
-
class Test < Base
|
28
|
-
on(/test\z/i, name: "test", description: "Return test message")
|
29
|
-
|
30
|
-
def test(message)
|
31
|
-
message.reply("This is test!!")
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
27
|
class Crawl < Base
|
36
28
|
on(
|
37
29
|
/crawl ?(?<url>.+)?\z/i,
|
@@ -47,6 +39,7 @@ module Ruboty
|
|
47
39
|
end
|
48
40
|
|
49
41
|
def crawl(message)
|
42
|
+
puts "crawl start in handlers"
|
50
43
|
url = (message[:url] == nil) ? "-- please set url --" : message[:url]
|
51
44
|
begin
|
52
45
|
crawl = CrawlManager.instance.get_crawl
|
data/lib/ruboty/ymcrawl/main.rb
CHANGED
@@ -7,155 +7,161 @@ require 'find'
|
|
7
7
|
require 'kconv'
|
8
8
|
require 'json-schema'
|
9
9
|
|
10
|
-
module
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
10
|
+
module Ruboty
|
11
|
+
module YMCrawl
|
12
|
+
ORG_SETTING_FILE_PATH = "YMCrawlfile"
|
13
|
+
SETTING_FILE_PATH = "#{ORG_SETTING_FILE_PATH}"
|
14
|
+
SCHEMA_FILE_PATH = "YMCrawl_schema.json"
|
15
|
+
UPLOADER_SCHEMA_FILE_PATH = "uploader_schema.json"
|
16
|
+
SITE_JSON_PATH = "site.json"
|
17
|
+
|
18
|
+
class DataManager
|
19
|
+
|
20
|
+
include Singleton
|
21
|
+
|
22
|
+
def initialize
|
23
|
+
@setting = JSON.parse( File.open(SETTING_FILE_PATH).read)
|
24
|
+
puts "setting: #{@setting}"
|
25
|
+
puts "YMCrawlfile valid"
|
26
|
+
puts JSON::Validator.fully_validate(SCHEMA_FILE_PATH, @setting, :insert_defaults => true).to_s
|
27
|
+
@sites = get_sites_json(SITE_JSON_PATH)
|
28
|
+
File.write( SITE_JSON_PATH, JSON.unparse(@sites) ) unless FileTest.exist?(SITE_JSON_PATH)
|
29
|
+
puts "uploader valid"
|
30
|
+
puts "uploder data: #{get_uploader_data}"
|
31
|
+
puts JSON::Validator.fully_validate(UPLOADER_SCHEMA_FILE_PATH, get_uploader_data, :insert_defaults => true).to_s
|
32
|
+
end
|
33
|
+
|
34
|
+
# 各サイトごとの、画像取得のためのcssセレクタを記載したjsonをファイルから取得して返す
|
35
|
+
def get_sites_json(path)
|
36
|
+
path = FileTest.exist?(path) ? path : @setting["site_json"]
|
37
|
+
puts "reading site json file from #{path}"
|
38
|
+
JSON.parse( open(path).read)
|
39
|
+
end
|
40
|
+
|
41
|
+
# URLのドメインに合致するsite情報を返す
|
42
|
+
def get_current_uploder_info(url)
|
43
|
+
host = URI(url).host
|
44
|
+
# ハッシュのkeyがs[0],valueがs[1]に入る
|
45
|
+
@sites.each{ |s| return s[1] if s[1]["host"] == host }
|
46
|
+
return @sites["default"]
|
47
|
+
end
|
48
|
+
|
49
|
+
def update_access_token(uploader_name, access_token)
|
50
|
+
@setting["uploader"][uploader_name]["access_token"] = access_token if @setting["uploader"][uploader_name] != access_token
|
51
|
+
puts "setting: #{@setting}"
|
52
|
+
open(SETTING_FILE_PATH, 'w') do |io|
|
53
|
+
JSON.dump(@setting, io)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def get_setting; @setting end
|
58
|
+
def get_save_to; @setting["save_to"] end
|
59
|
+
def get_uploader_data; @setting["uploader"][get_save_to] end
|
60
|
+
def get_current_access_token; get_uploader_data["access_token"] end
|
61
|
+
def get_current_app_key; ENV["#{@setting["save_to"].upcase }_APP_KEY"] end
|
62
|
+
def get_current_app_secret; ENV["#{@setting["save_to"].upcase }_APP_SECRET"] end
|
63
|
+
end
|
64
|
+
|
65
|
+
class Core
|
66
|
+
def initialize
|
67
|
+
@data = DataManager.instance
|
68
|
+
if @data.get_save_to != "local"
|
69
|
+
@uploader = Uploader.new(@data.get_save_to, @data.get_current_app_key, @data.get_current_app_secret, @data.get_current_access_token)
|
70
|
+
end
|
67
71
|
end
|
68
|
-
end
|
69
72
|
|
70
|
-
|
73
|
+
def start(urls); upload crawl(urls) end
|
71
74
|
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
75
|
+
# 画像をクロールして保存する。保存したファイルのパスを返す。
|
76
|
+
def crawl(urls)
|
77
|
+
puts "in crawl"
|
78
|
+
ncrawler = Crawler.new(@data.get_setting["dst_dir"], @data.get_current_uploder_info(urls[0]), @data.get_setting["wait_time"])
|
79
|
+
urls.map{ |v| ncrawler.save_images(v) }
|
80
|
+
end
|
77
81
|
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
encode
|
91
|
-
|
82
|
+
# 画像を指定した先へアップロード
|
83
|
+
def upload(file_dirs)
|
84
|
+
puts "in upload"
|
85
|
+
setting = @data.get_setting
|
86
|
+
return nil if @data.get_save_to == "local"
|
87
|
+
|
88
|
+
@uploader.login(@data.get_current_access_token)
|
89
|
+
zip_paths = file_dirs.map{ |dir| zip_dir(dir) }
|
90
|
+
encode = (ENV["LANG"] == nil) ? "utf-8" : ENV["LANG"]
|
91
|
+
begin
|
92
|
+
file_dirs.each{ |dir| FileUtils::remove_entry_secure( dir.force_encoding(encode) ) }
|
93
|
+
rescue
|
94
|
+
if encode != "ascii-8bit"
|
95
|
+
encode = "ascii-8bit"
|
96
|
+
retry
|
97
|
+
end
|
92
98
|
end
|
99
|
+
share_paths = []
|
100
|
+
zip_paths.each do |path|
|
101
|
+
puts "uploading #{path} to dropbox"
|
102
|
+
put_result = @uploader.put([path])
|
103
|
+
File::delete(path)
|
104
|
+
share_paths << @uploader.get_share_link(put_result["path"])["url"]
|
105
|
+
end
|
106
|
+
return share_paths
|
93
107
|
end
|
94
|
-
share_paths = []
|
95
|
-
zip_paths.each do |path|
|
96
|
-
puts "uploading #{path} to dropbox"
|
97
|
-
put_result = @uploader.put([path])
|
98
|
-
File::delete(path)
|
99
|
-
share_paths << @uploader.get_share_link(put_result["path"])["url"]
|
100
|
-
end
|
101
|
-
return share_paths
|
102
|
-
end
|
103
108
|
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
109
|
+
# 指定されたディレクトリ以下のファイルをzipにする。返り値はzipのパス
|
110
|
+
def zip_dir(src)
|
111
|
+
dst = "#{src}.zip"
|
112
|
+
Zip::Archive.open(dst, Zip::CREATE) do |ar|
|
113
|
+
Dir.glob("#{src}/*").each do |item|
|
114
|
+
ar.add_file(item)
|
115
|
+
end
|
110
116
|
end
|
117
|
+
dst
|
111
118
|
end
|
112
|
-
dst
|
113
|
-
end
|
114
119
|
|
115
|
-
|
116
|
-
end
|
117
|
-
|
118
|
-
# ファイルをアップロードする先を抽象化したクラス
|
119
|
-
class Uploader
|
120
|
-
def initialize(name, app_key, app_secret, access_token = nil)
|
121
|
-
@name = name
|
122
|
-
@app_key = app_key
|
123
|
-
@app_secret = app_secret
|
124
|
-
@access_token = access_token
|
125
|
-
@c_uploader = create_uploader
|
120
|
+
def get_uploader; @uploader end
|
126
121
|
end
|
127
122
|
|
128
|
-
#
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
@
|
133
|
-
|
123
|
+
# ファイルをアップロードする先を抽象化したクラス
|
124
|
+
class Uploader
|
125
|
+
def initialize(name, app_key, app_secret, access_token = nil)
|
126
|
+
@name = name
|
127
|
+
@app_key = app_key
|
128
|
+
@app_secret = app_secret
|
129
|
+
@access_token = access_token
|
130
|
+
@c_uploader = create_uploader
|
134
131
|
end
|
135
|
-
raise ArgumentError("uploader #{@name} is not found")
|
136
|
-
end
|
137
132
|
|
138
|
-
|
133
|
+
# 引数に応じてアップロード先のインスタンスを返す
|
134
|
+
def create_uploader
|
135
|
+
return @c_uploader unless @c_uploader == nil
|
136
|
+
if @name == "dropbox"
|
137
|
+
@c_uploader = DropboxManager.new(@app_key, @app_secret)
|
138
|
+
return @c_uploader
|
139
|
+
end
|
140
|
+
raise ArgumentError("uploader #{@name} is not found")
|
141
|
+
end
|
139
142
|
|
140
|
-
|
141
|
-
@access_token = @c_uploader.get_access_token(auth_code)
|
142
|
-
end
|
143
|
+
def access_token?; @access_token != "" and @access_token != nil end
|
143
144
|
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
puts "---- access token isn't set when login!!!! ----" if token ==nil
|
148
|
-
@c_uploader.login(token)
|
149
|
-
end
|
145
|
+
def verify_auth_code(auth_code)
|
146
|
+
@access_token = @c_uploader.get_access_token(auth_code)
|
147
|
+
end
|
150
148
|
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
149
|
+
def login(token = nil)
|
150
|
+
@access_token = (token == nil) ? @access_token : token
|
151
|
+
puts "access token: #{@access_token}"
|
152
|
+
puts "---- access token isn't set when login!!!! ----" if token ==nil
|
153
|
+
@c_uploader.login(token)
|
154
|
+
end
|
156
155
|
|
157
|
-
|
158
|
-
|
159
|
-
|
156
|
+
def get_access_token_url
|
157
|
+
error = "---- YMCrawl publishing new access token url. But you already have access token. ----"
|
158
|
+
puts error if @access_token != nil and @access_token != ""
|
159
|
+
@c_uploader.get_auth_code_url
|
160
|
+
end
|
161
|
+
|
162
|
+
def get_name; @name end
|
163
|
+
def put(command) @c_uploader.put(command) end
|
164
|
+
def get_share_link(path) @c_uploader.get_share_link(path) end
|
165
|
+
end
|
160
166
|
end
|
161
|
-
end
|
167
|
+
end
|
data/lib/ruboty/ymcrawl.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruboty-ymcrawl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- mpk
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-12-
|
11
|
+
date: 2014-12-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|