ruboty-ymcrawl 0.0.1 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ruboty/handlers/ymcrawl.rb +79 -6
- data/lib/ruboty/ymcrawl/crawler.rb +208 -0
- data/lib/ruboty/ymcrawl/dropbox.rb +60 -0
- data/lib/ruboty/ymcrawl/main.rb +161 -0
- data/lib/ruboty/ymcrawl/version.rb +1 -1
- data/lib/ruboty/ymcrawl.rb +4 -7
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f1fa7723d9cb543e8a2be4c47f7639f579798f33
|
4
|
+
data.tar.gz: 57d00ba352089fc46a5f337e525424b2eba68dc4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2a388ae8e594e16e25721f92a22dcf42d9845986b13d0880d1d9d7961edc69452a2ce88f0672ecf9a8122349cd0dd1acb7ad3088907c0702eda78863c24a16fb
|
7
|
+
data.tar.gz: c26de2ae35453342d9d9e63a83880df8ab4adb6d07e528728a845b2a60ee826dde6f2c69816f7e2652f8a6ac31621bf552d6c3a69fc6951f004776112c32d1fb
|
@@ -1,15 +1,88 @@
|
|
1
|
+
require_relative 'src/main'
|
2
|
+
require 'singleton'
|
3
|
+
|
1
4
|
module Ruboty
|
2
5
|
module Handlers
|
3
|
-
|
6
|
+
|
7
|
+
class CrawlManager
|
8
|
+
include Singleton
|
9
|
+
def initialize
|
10
|
+
@crawl = nil
|
11
|
+
end
|
12
|
+
|
13
|
+
def get_crawl
|
14
|
+
@crawl = YMCrawl::Core.new if @crawl == nil
|
15
|
+
@crawl
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
class Hello < Base
|
20
|
+
on /hello\z/i, name: "hello", description: "Return hello"
|
21
|
+
|
22
|
+
def hello(message)
|
23
|
+
message.reply("hello!!")
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
class Crawl < Base
|
4
28
|
on(
|
5
|
-
/crawl
|
6
|
-
name:
|
7
|
-
description:
|
29
|
+
/crawl ?(?<url>.+)?\z/i,
|
30
|
+
name: "crawl",
|
31
|
+
description: "crawl image"
|
8
32
|
)
|
9
33
|
|
10
|
-
def
|
11
|
-
|
34
|
+
def get_access_token_message(url)
|
35
|
+
return "You don't have access token.
|
36
|
+
1. Go to: #{url}
|
37
|
+
2. Click \"Allow\" (you might have to log in first).
|
38
|
+
3. reply to bot as \"@bot dropbox:auth (auth_code) \""
|
39
|
+
end
|
40
|
+
|
41
|
+
def crawl(message)
|
42
|
+
url = (message[:url] == nil) ? "-- please set url --" : message[:url]
|
43
|
+
begin
|
44
|
+
crawl = CrawlManager.instance.get_crawl
|
45
|
+
uploader = crawl.get_uploader
|
46
|
+
|
47
|
+
# upload先がlocal以外かつアクセストークンが取得されていない場合は、取得先URLを示して終了
|
48
|
+
if not uploader.access_token? and uploader.get_name != "local"
|
49
|
+
message.reply( get_access_token_message( uploader.get_access_token_url ) )
|
50
|
+
return nil
|
51
|
+
end
|
52
|
+
|
53
|
+
message.reply("rubot is crawling from #{url}")
|
54
|
+
zip_paths = crawl.start([url])
|
55
|
+
message.reply("get zip file => #{zip_paths}")
|
56
|
+
rescue URI::InvalidURIError => ex
|
57
|
+
puts ex
|
58
|
+
message.reply("URL is invalid. please retry.")
|
59
|
+
rescue => ex
|
60
|
+
puts "error raise in Crawl.crawl"
|
61
|
+
puts ex
|
62
|
+
message.reply("Sorry, error occurred.")
|
63
|
+
message.reply("Please feedback this error to niboshiporipori@gmail.com")
|
64
|
+
message.reply(ex)
|
65
|
+
end
|
12
66
|
end
|
13
67
|
end
|
68
|
+
|
69
|
+
class VerifyAuthCode < Base
|
70
|
+
on(
|
71
|
+
/dropbox:auth ?(?<auth_code>.+)?\z/i,
|
72
|
+
name: "verify_auth_code",
|
73
|
+
description: "add access token by auth code"
|
74
|
+
)
|
75
|
+
|
76
|
+
def verify_auth_code(message)
|
77
|
+
auth_code = (message[:auth_code] == nil) ? "-- please set auth_code --" : message[:auth_code]
|
78
|
+
uploader = CrawlManager.instance.get_crawl.get_uploader
|
79
|
+
access_token = uploader.verify_auth_code(auth_code)
|
80
|
+
YMCrawl::DataManager.instance.update_access_token(uploader.get_name, access_token)
|
81
|
+
|
82
|
+
message.reply("You added access token!")
|
83
|
+
message.reply("Try clawling again!")
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
14
87
|
end
|
15
88
|
end
|
@@ -0,0 +1,208 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'kconv'
|
4
|
+
require 'addressable/uri'
|
5
|
+
require 'singleton'
|
6
|
+
|
7
|
+
module YMCrawl
|
8
|
+
# URLに関する処理をまとめたクラス
|
9
|
+
class URLUtil
|
10
|
+
def self.normalize_url(url)
|
11
|
+
puts "---- URL is null in normalize_url!!!!!!!!!!!!! ----" if url == nil
|
12
|
+
Addressable::URI.parse(url).normalize.to_s
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
# CSSセレクタを表すクラス
|
17
|
+
class Selector
|
18
|
+
def initialize(css)
|
19
|
+
@selector = css
|
20
|
+
end
|
21
|
+
|
22
|
+
def to_s ;@selector end
|
23
|
+
|
24
|
+
# セレクタの一番最後のタグが何かを返す。擬似クラスなどは取り除く
|
25
|
+
def get_last_tag
|
26
|
+
# 一番最後の要素だけを返す。(擬似クラスなどは省く)
|
27
|
+
@selector.split(/\s|\+|>/).last.split(/:|,|\[|\.|#/).first
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# ホストごとの処理を管理するクラス
|
32
|
+
class HostManager
|
33
|
+
include Singleton
|
34
|
+
DEFAULT_WAIT_TIME = 2
|
35
|
+
def initialize
|
36
|
+
@host_list = {}
|
37
|
+
@wait_time = DEFAULT_WAIT_TIME
|
38
|
+
end
|
39
|
+
|
40
|
+
def set_wait_time(wait_time) @wait_time = wait_time end
|
41
|
+
|
42
|
+
# 最後にアクセスした日時を取得する
|
43
|
+
def wait(url)
|
44
|
+
host = URI( URLUtil.normalize_url(url) ).host
|
45
|
+
unless @host_list[host] == nil then
|
46
|
+
time_diff = Time.now - @host_list[host]
|
47
|
+
puts "sleep: #{sleep(@wait_time - time_diff)}sec." if time_diff < @wait_time
|
48
|
+
end
|
49
|
+
@host_list[host] = Time.now
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# あるURLから取得できるHTMLドキュメントを抽象化したクラス
|
54
|
+
class Page
|
55
|
+
class PageError < StandardError; end
|
56
|
+
def initialize(url)
|
57
|
+
@url = url
|
58
|
+
@doc = get_doc
|
59
|
+
end
|
60
|
+
|
61
|
+
# 指定したcssセレクタに合致する要素を表すクラスの配列を返す
|
62
|
+
def search_elements(selector) @doc.css(selector).map{ |doc| Element.new(doc) } end
|
63
|
+
|
64
|
+
private
|
65
|
+
# 与えられたURLをパースして返す
|
66
|
+
def get_doc
|
67
|
+
puts "get_doc from #{@url}"
|
68
|
+
HostManager.instance.wait(@url)
|
69
|
+
html = open(URLUtil.normalize_url(@url), "r:binary").read
|
70
|
+
Nokogiri::HTML(html.toutf8, nil, 'utf-8')
|
71
|
+
rescue OpenURI::HTTPError => ex
|
72
|
+
puts "failed URL: #{@url}"
|
73
|
+
puts "HTTP Error message: #{ex.message}"
|
74
|
+
raise PageError.new(ex.message)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
# セレクタにより抽出されたPageの一部を表すクラス
|
79
|
+
class Element
|
80
|
+
def initialize(doc) @doc = doc end
|
81
|
+
|
82
|
+
def get_url; @doc["href"] end
|
83
|
+
|
84
|
+
# 画像へのURLを返す
|
85
|
+
def get_image_url
|
86
|
+
return @doc["href"] if @doc.name == "a"
|
87
|
+
return @doc["src"] if @doc.name == "img"
|
88
|
+
raise ArgumentError, "in Element"
|
89
|
+
end
|
90
|
+
|
91
|
+
# 画像のタイトルを返す
|
92
|
+
def get_image_title
|
93
|
+
title = (@doc.name == "img") ? @doc["title"] : @doc.content
|
94
|
+
(title == nil) ? "noname" : title
|
95
|
+
end
|
96
|
+
|
97
|
+
# 記事タイトルを返す
|
98
|
+
def get_title; @doc.content end
|
99
|
+
|
100
|
+
# 記事が何ページまであるかを返す
|
101
|
+
def get_page_index_max; @doc.content.to_i end
|
102
|
+
|
103
|
+
# 対象に応じてURLを返す
|
104
|
+
def get_content(target)
|
105
|
+
return get_url if target == :url
|
106
|
+
return get_image_url if target == :image
|
107
|
+
return get_image_title if target == :image_title
|
108
|
+
return get_title if target == :title
|
109
|
+
return get_page_index_max if target == :page_index_max
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
# 画像のスクレイピングを行うクラス
|
114
|
+
class Crawler
|
115
|
+
INDEX_STR = "{index}" # jsonファイルでINDEX番号が入る場所を表す文字列
|
116
|
+
|
117
|
+
def initialize(dir, site_data, wait_time)
|
118
|
+
HostManager.instance.set_wait_time(wait_time)
|
119
|
+
@selectors = {}
|
120
|
+
@selectors[:image] = site_data["css"]["image"].map { |s| Selector.new(s) }
|
121
|
+
@selectors[:image_title] = site_data["css"]["image_title"].map { |s| Selector.new(s) }
|
122
|
+
@selectors[:title] = site_data["css"]["title"].map { |s| Selector.new(s) }
|
123
|
+
@selectors[:page_index_max] = site_data["css"]["page_index_max"].map { |s| Selector.new(s) }
|
124
|
+
@page_index_min = site_data["page_index_min"]
|
125
|
+
@next_page_appendix = (site_data["next_page_appendix"] == nil) ? "" : site_data["next_page_appendix"]
|
126
|
+
@dir = dir
|
127
|
+
end
|
128
|
+
|
129
|
+
# 与えられたcssセレクタから画像を抽出する
|
130
|
+
def save_images(original_url)
|
131
|
+
dst_dir = "#{@dir}/#{get_contents(original_url, :title).first}"
|
132
|
+
(@page_index_min..get_page_index_max(original_url) ).each do |page_index|
|
133
|
+
url = "#{original_url}#{get_next_page_appendix_with_index(page_index)}"
|
134
|
+
get_contents(url, :image).zip(get_contents(url, :image_title)) do |url, title|
|
135
|
+
save_image(dst_dir, url, title) unless url == nil
|
136
|
+
end
|
137
|
+
end
|
138
|
+
dst_dir
|
139
|
+
end
|
140
|
+
|
141
|
+
private
|
142
|
+
# ファイル名が既にimgディレクトリに存在していた場合はインデックスを付与する
|
143
|
+
def get_unique_name(dir, org_name)
|
144
|
+
basename = (org_name == nil) ? "noname" : File.basename(org_name, '.*')
|
145
|
+
ext = File.extname(org_name)
|
146
|
+
return "#{basename}#{ext}" unless FileTest.exist?("#{dir}/#{basename}#{ext}")
|
147
|
+
index = 1
|
148
|
+
retname = "#{basename}#{index}#{ext}"
|
149
|
+
while FileTest.exist?("#{dir}/#{retname}") do
|
150
|
+
index = index + 1
|
151
|
+
retname = "#{basename}#{index}#{ext}"
|
152
|
+
end
|
153
|
+
return retname
|
154
|
+
end
|
155
|
+
|
156
|
+
# 指定されたリンク先の画像を保存する
|
157
|
+
def save_image(dst_dir, url, title)
|
158
|
+
puts "src: #{url}"
|
159
|
+
# ready filepath
|
160
|
+
filename = "#{title}#{File.extname(url)}"
|
161
|
+
filePath = "#{dst_dir}/#{get_unique_name(dst_dir, filename)}"
|
162
|
+
HostManager.instance.wait(url)
|
163
|
+
# fileName folder if not exist
|
164
|
+
FileUtils.mkdir_p(dst_dir) unless FileTest.exist?(dst_dir)
|
165
|
+
# write image adata
|
166
|
+
begin
|
167
|
+
open(filePath, 'wb') do |output|
|
168
|
+
puts "dst: #{filePath}"
|
169
|
+
open(URLUtil.normalize_url(url)) do |data|
|
170
|
+
output.write(data.read)
|
171
|
+
end
|
172
|
+
end
|
173
|
+
rescue # ファイルが存在しないなどの理由で例外が発生した場合は、生成した画像を削除
|
174
|
+
puts "image not exist."
|
175
|
+
File.delete filePath
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
# URLに付加する文字列を返す
|
180
|
+
def get_next_page_appendix_with_index(index)
|
181
|
+
return "" if @next_page_appendix == ""
|
182
|
+
@next_page_appendix.gsub("{index}", index.to_s)
|
183
|
+
end
|
184
|
+
|
185
|
+
# 記事の最大ページを取得する
|
186
|
+
def get_page_index_max(url)
|
187
|
+
# page_index_maxのcssが空文字だとget_contentsがエラーになるので、最初にチェック
|
188
|
+
return @page_index_min if @next_page_appendix == ""
|
189
|
+
page_index_max = get_contents(url, :page_index_max)
|
190
|
+
return @page_index_min if page_index_max.length == 0
|
191
|
+
(page_index_max.first.kind_of?(Integer)) ? page_index_max.first : @page_index_min
|
192
|
+
end
|
193
|
+
|
194
|
+
# 与えられたURLから、セレクタに従って画像のURLを返す
|
195
|
+
def get_contents(url, target, nest = 0)
|
196
|
+
selector = @selectors[target][nest]
|
197
|
+
if nest >= (@selectors[target].length - 1)
|
198
|
+
return Page.new(url).search_elements(selector.to_s).map{ |cn| cn.get_content(target) }
|
199
|
+
end
|
200
|
+
# 得られたURLそれぞれに対して次のセレクタを実行する
|
201
|
+
contents = Page.new(url).search_elements(selector.to_s).map{ |cn| cn.get_content(:url) }
|
202
|
+
contents.map{ |c| get_contents(c, target, nest + 1) }.flatten
|
203
|
+
rescue Page::PageError => ex
|
204
|
+
puts "error in get_contents #{ex}"
|
205
|
+
return nil
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# Install this the SDK with "gem install dropbox-sdk"
|
2
|
+
require 'dropbox_sdk'
|
3
|
+
module YMCrawl
|
4
|
+
class DropboxManager
|
5
|
+
|
6
|
+
def initialize(app_key, app_sec)
|
7
|
+
@app_key = app_key
|
8
|
+
@app_sec = app_sec
|
9
|
+
@client = nil
|
10
|
+
@access_token = nil
|
11
|
+
end
|
12
|
+
|
13
|
+
def login(arg_access_token = nil)
|
14
|
+
if not @client.nil?
|
15
|
+
puts "already logged in!"
|
16
|
+
return @access_token
|
17
|
+
end
|
18
|
+
|
19
|
+
@access_token = arg_access_token
|
20
|
+
begin
|
21
|
+
@client = DropboxClient.new(@access_token)
|
22
|
+
puts "account info: #{@client.account_info()}"
|
23
|
+
return @access_token
|
24
|
+
rescue DropboxError => ex
|
25
|
+
puts "---- access token is invalid ----"
|
26
|
+
return nil
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def get_auth_code_url
|
31
|
+
puts "web_auth is nil!!!!" if @web_auth == nil
|
32
|
+
@web_auth = DropboxOAuth2FlowNoRedirect.new(@app_key, @app_sec)
|
33
|
+
authorize_url = @web_auth.start()
|
34
|
+
end
|
35
|
+
|
36
|
+
def get_access_token(auth_code)
|
37
|
+
@web_auth.finish(auth_code)[0]
|
38
|
+
end
|
39
|
+
|
40
|
+
def put(command)
|
41
|
+
fname = command[0]
|
42
|
+
|
43
|
+
#If the user didn't specifiy the file name, just use the name of the file on disk
|
44
|
+
if command[1]
|
45
|
+
new_name = command[1]
|
46
|
+
else
|
47
|
+
new_name = File.basename(fname)
|
48
|
+
end
|
49
|
+
|
50
|
+
if fname && !fname.empty? && File.exists?(fname) && (File.ftype(fname) == 'file') && File.stat(fname).readable?
|
51
|
+
#This is where we call the the Dropbox Client
|
52
|
+
pp @client.put_file(new_name, open(fname))
|
53
|
+
else
|
54
|
+
puts "couldn't find the file #{ fname }"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def get_share_link(path) @client.shares(path) end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,161 @@
|
|
1
|
+
require_relative 'crawler'
|
2
|
+
require_relative 'dropbox.rb'
|
3
|
+
require 'optparse'
|
4
|
+
require 'json'
|
5
|
+
require 'zipruby'
|
6
|
+
require 'find'
|
7
|
+
require 'kconv'
|
8
|
+
require 'json-schema'
|
9
|
+
|
10
|
+
module YMCrawl
|
11
|
+
ORG_SETTING_FILE_PATH = "YMCrawlfile"
|
12
|
+
SETTING_FILE_PATH = "#{ORG_SETTING_FILE_PATH}"
|
13
|
+
SCHEMA_FILE_PATH = "YMCrawl_schema.json"
|
14
|
+
UPLOADER_SCHEMA_FILE_PATH = "uploader_schema.json"
|
15
|
+
SITE_JSON_PATH = "site.json"
|
16
|
+
|
17
|
+
class DataManager
|
18
|
+
|
19
|
+
include Singleton
|
20
|
+
|
21
|
+
def initialize
|
22
|
+
@setting = JSON.parse( File.open(SETTING_FILE_PATH).read)
|
23
|
+
puts "YMCrawlfile valid"
|
24
|
+
puts JSON::Validator.fully_validate(SCHEMA_FILE_PATH, @setting, :insert_defaults => true).to_s
|
25
|
+
@sites = get_sites_json(SITE_JSON_PATH)
|
26
|
+
File.write( SITE_JSON_PATH, JSON.unparse(@sites) ) unless FileTest.exist?(SITE_JSON_PATH)
|
27
|
+
puts "uploader valid"
|
28
|
+
puts JSON::Validator.fully_validate(UPLOADER_SCHEMA_FILE_PATH, get_uploader_data, :insert_defaults => true).to_s
|
29
|
+
end
|
30
|
+
|
31
|
+
# 各サイトごとの、画像取得のためのcssセレクタを記載したjsonをファイルから取得して返す
|
32
|
+
def get_sites_json(path)
|
33
|
+
path = FileTest.exist?(path) ? path : @setting["site_json"]
|
34
|
+
puts "reading site json file from #{path}"
|
35
|
+
JSON.parse( open(path).read)
|
36
|
+
end
|
37
|
+
|
38
|
+
# URLのドメインに合致するsite情報を返す
|
39
|
+
def get_current_uploder_info(url)
|
40
|
+
host = URI(url).host
|
41
|
+
# ハッシュのkeyがs[0],valueがs[1]に入る
|
42
|
+
@sites.each{ |s| return s[1] if s[1]["host"] == host }
|
43
|
+
return @sites["default"]
|
44
|
+
end
|
45
|
+
|
46
|
+
def update_access_token(uploader_name, access_token)
|
47
|
+
@setting["uploader"][uploader_name]["access_token"] = access_token if @setting["uploader"][uploader_name] != access_token
|
48
|
+
puts "setting: #{@setting}"
|
49
|
+
open(SETTING_FILE_PATH, 'w') do |io|
|
50
|
+
JSON.dump(@setting, io)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def get_setting; @setting end
|
55
|
+
def get_save_to; @setting["save_to"] end
|
56
|
+
def get_uploader_data; @setting["uploader"][get_save_to] end
|
57
|
+
def get_current_access_token; get_uploader_data["access_token"] end
|
58
|
+
def get_current_app_key; ENV["#{@setting["save_to"].upcase }_APP_KEY"] end
|
59
|
+
def get_current_app_secret; ENV["#{@setting["save_to"].upcase }_APP_SECRET"] end
|
60
|
+
end
|
61
|
+
|
62
|
+
class Core
|
63
|
+
def initialize
|
64
|
+
@data = DataManager.instance
|
65
|
+
if @data.get_save_to != "local"
|
66
|
+
@uploader = Uploader.new(@data.get_save_to, @data.get_current_app_key, @data.get_current_app_secret, @data.get_current_access_token)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def start(urls); upload crawl(urls) end
|
71
|
+
|
72
|
+
# 画像をクロールして保存する。保存したファイルのパスを返す。
|
73
|
+
def crawl(urls)
|
74
|
+
ncrawler = Crawler.new(@data.get_setting["dst_dir"], @data.get_current_uploder_info(urls[0]), @data.get_setting["wait_time"])
|
75
|
+
urls.map{ |v| ncrawler.save_images(v) }
|
76
|
+
end
|
77
|
+
|
78
|
+
# 画像を指定した先へアップロード
|
79
|
+
def upload(file_dirs)
|
80
|
+
setting = @data.get_setting
|
81
|
+
return nil if @data.get_save_to == "local"
|
82
|
+
|
83
|
+
@uploader.login(@data.get_current_access_token)
|
84
|
+
zip_paths = file_dirs.map{ |dir| zip_dir(dir) }
|
85
|
+
encode = (ENV["LANG"] == nil) ? "utf-8" : ENV["LANG"]
|
86
|
+
begin
|
87
|
+
file_dirs.each{ |dir| FileUtils::remove_entry_secure( dir.force_encoding(encode) ) }
|
88
|
+
rescue
|
89
|
+
if encode != "ascii-8bit"
|
90
|
+
encode = "ascii-8bit"
|
91
|
+
retry
|
92
|
+
end
|
93
|
+
end
|
94
|
+
share_paths = []
|
95
|
+
zip_paths.each do |path|
|
96
|
+
puts "uploading #{path} to dropbox"
|
97
|
+
put_result = @uploader.put([path])
|
98
|
+
File::delete(path)
|
99
|
+
share_paths << @uploader.get_share_link(put_result["path"])["url"]
|
100
|
+
end
|
101
|
+
return share_paths
|
102
|
+
end
|
103
|
+
|
104
|
+
# 指定されたディレクトリ以下のファイルをzipにする。返り値はzipのパス
|
105
|
+
def zip_dir(src)
|
106
|
+
dst = "#{src}.zip"
|
107
|
+
Zip::Archive.open(dst, Zip::CREATE) do |ar|
|
108
|
+
Dir.glob("#{src}/*").each do |item|
|
109
|
+
ar.add_file(item)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
dst
|
113
|
+
end
|
114
|
+
|
115
|
+
def get_uploader; @uploader end
|
116
|
+
end
|
117
|
+
|
118
|
+
# ファイルをアップロードする先を抽象化したクラス
|
119
|
+
class Uploader
|
120
|
+
def initialize(name, app_key, app_secret, access_token = nil)
|
121
|
+
@name = name
|
122
|
+
@app_key = app_key
|
123
|
+
@app_secret = app_secret
|
124
|
+
@access_token = access_token
|
125
|
+
@c_uploader = create_uploader
|
126
|
+
end
|
127
|
+
|
128
|
+
# 引数に応じてアップロード先のインスタンスを返す
|
129
|
+
def create_uploader
|
130
|
+
return @c_uploader unless @c_uploader == nil
|
131
|
+
if @name == "dropbox"
|
132
|
+
@c_uploader = DropboxManager.new(@app_key, @app_secret)
|
133
|
+
return @c_uploader
|
134
|
+
end
|
135
|
+
raise ArgumentError("uploader #{@name} is not found")
|
136
|
+
end
|
137
|
+
|
138
|
+
def access_token?; @access_token != "" and @access_token != nil end
|
139
|
+
|
140
|
+
def verify_auth_code(auth_code)
|
141
|
+
@access_token = @c_uploader.get_access_token(auth_code)
|
142
|
+
end
|
143
|
+
|
144
|
+
def login(token = nil)
|
145
|
+
@access_token = (token == nil) ? @access_token : token
|
146
|
+
puts "access token: #{@access_token}"
|
147
|
+
puts "---- access token isn't set when login!!!! ----" if token ==nil
|
148
|
+
@c_uploader.login(token)
|
149
|
+
end
|
150
|
+
|
151
|
+
def get_access_token_url
|
152
|
+
error = "---- YMCrawl publishing new access token url. But you already have access token. ----"
|
153
|
+
puts error if @access_token != nil and @access_token != ""
|
154
|
+
@c_uploader.get_auth_code_url
|
155
|
+
end
|
156
|
+
|
157
|
+
def get_name; @name end
|
158
|
+
def put(command) @c_uploader.put(command) end
|
159
|
+
def get_share_link(path) @c_uploader.get_share_link(path) end
|
160
|
+
end
|
161
|
+
end
|
data/lib/ruboty/ymcrawl.rb
CHANGED
@@ -1,9 +1,6 @@
|
|
1
|
+
require "ruboty/handlers/ymcrawl"
|
2
|
+
require "ruboty/ymcrawl/crawler"
|
3
|
+
require "ruboty/ymcrawl/dropbox"
|
4
|
+
require "ruboty/ymcrawl/main"
|
1
5
|
require "ruboty/ymcrawl/version"
|
2
6
|
|
3
|
-
module Ruboty
|
4
|
-
module Ymcrawl
|
5
|
-
def self.hoge
|
6
|
-
puts "hello from ymcrawl hoge"
|
7
|
-
end
|
8
|
-
end
|
9
|
-
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruboty-ymcrawl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- mpk
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-12-
|
11
|
+
date: 2014-12-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -159,6 +159,9 @@ files:
|
|
159
159
|
- Rakefile
|
160
160
|
- lib/ruboty/handlers/ymcrawl.rb
|
161
161
|
- lib/ruboty/ymcrawl.rb
|
162
|
+
- lib/ruboty/ymcrawl/crawler.rb
|
163
|
+
- lib/ruboty/ymcrawl/dropbox.rb
|
164
|
+
- lib/ruboty/ymcrawl/main.rb
|
162
165
|
- lib/ruboty/ymcrawl/version.rb
|
163
166
|
- ruboty-ymcrawl.gemspec
|
164
167
|
- spec/ruboty/ymcrawl_spec.rb
|