ocawari 0.9.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.ruby-version +1 -0
- data/.travis.yml +6 -0
- data/Gemfile +5 -0
- data/LICENSE.txt +21 -0
- data/README.md +95 -0
- data/Rakefile +13 -0
- data/bin/console +11 -0
- data/bin/setup +8 -0
- data/exe/oca +51 -0
- data/lib/ocawari.rb +72 -0
- data/lib/ocawari/parser.rb +20 -0
- data/lib/ocawari/strategy/ameblo.rb +51 -0
- data/lib/ocawari/strategy/entame_clip.rb +18 -0
- data/lib/ocawari/strategy/gendai_business.rb +37 -0
- data/lib/ocawari/strategy/girls_news.rb +19 -0
- data/lib/ocawari/strategy/google_plus.rb +30 -0
- data/lib/ocawari/strategy/hustlepress.rb +18 -0
- data/lib/ocawari/strategy/imgur.rb +14 -0
- data/lib/ocawari/strategy/instagram.rb +37 -0
- data/lib/ocawari/strategy/kaiyou.rb +25 -0
- data/lib/ocawari/strategy/keyakizaka46.rb +23 -0
- data/lib/ocawari/strategy/line.rb +31 -0
- data/lib/ocawari/strategy/mantan_web.rb +27 -0
- data/lib/ocawari/strategy/mens_fashion.rb +20 -0
- data/lib/ocawari/strategy/modelpress.rb +25 -0
- data/lib/ocawari/strategy/nana_bun_no_nijuuni.rb +18 -0
- data/lib/ocawari/strategy/nana_go_go.rb +18 -0
- data/lib/ocawari/strategy/natalie.rb +18 -0
- data/lib/ocawari/strategy/news_dwango.rb +18 -0
- data/lib/ocawari/strategy/nikkan_sports.rb +22 -0
- data/lib/ocawari/strategy/no_match.rb +12 -0
- data/lib/ocawari/strategy/okmusicjp.rb +19 -0
- data/lib/ocawari/strategy/sirabee.rb +29 -0
- data/lib/ocawari/strategy/stereo_sound.rb +19 -0
- data/lib/ocawari/strategy/tokyo_idol_net.rb +14 -0
- data/lib/ocawari/strategy/tumblr.rb +58 -0
- data/lib/ocawari/strategy/tv_tokyo.rb +18 -0
- data/lib/ocawari/strategy/twitter.rb +29 -0
- data/lib/ocawari/strategy_delegator.rb +52 -0
- data/lib/ocawari/version.rb +3 -0
- data/ocawari.gemspec +45 -0
- metadata +342 -0
@@ -0,0 +1,19 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class GirlsNews < Parser
|
4
|
+
|
5
|
+
private
|
6
|
+
|
7
|
+
CSS_SELECTORS = [
|
8
|
+
"div#img_field div.single_img_field_l a",
|
9
|
+
"div#img_field ul li.single_img_field_s a"
|
10
|
+
]
|
11
|
+
|
12
|
+
def parse
|
13
|
+
page.css(CSS_SELECTORS.join(", ")).map do |a|
|
14
|
+
File.join("http://www.stereosound.co.jp", a["href"])
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class GooglePlus < Parser
|
4
|
+
def initialize(uri)
|
5
|
+
if USER_IDENTIFIER.match?(uri.to_s)
|
6
|
+
@uri = Addressable::URI.parse(uri.to_s.sub(USER_IDENTIFIER, ""))
|
7
|
+
else
|
8
|
+
@uri = uri
|
9
|
+
end
|
10
|
+
|
11
|
+
@page = Nokogiri::HTML(open(@uri).read)
|
12
|
+
rescue OpenURI::HTTPError
|
13
|
+
@page = nil
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
USER_IDENTIFIER = /u\/\d+\//
|
19
|
+
|
20
|
+
def parse
|
21
|
+
album_url = File.join("https://plus.google.com", @page.to_html[/(\/photos\/\d+\/albums\/\d+)/, 1])
|
22
|
+
album_page = Nokogiri::HTML(open(album_url).read)
|
23
|
+
|
24
|
+
images = album_page.css("img").map do |img|
|
25
|
+
img["src"].sub(/=w\d+-h\d+/, "=s0")
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class Hustlepress < Parser
|
4
|
+
|
5
|
+
private
|
6
|
+
|
7
|
+
CSS_SELECTORS = [
|
8
|
+
"div.post_content img.size-full"
|
9
|
+
]
|
10
|
+
|
11
|
+
def parse
|
12
|
+
page.css(CSS_SELECTORS.join(", ")).map do |img|
|
13
|
+
img["src"]
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class Instagram < Parser
|
4
|
+
def initialize(uri)
|
5
|
+
if /\?taken-by=/.match?(uri.path)
|
6
|
+
uri.path = uri.path.sub(/\/\?taken-by=.*/, "")
|
7
|
+
@page = Nokogiri::HTML(open(uri).read)
|
8
|
+
else
|
9
|
+
@page = Nokogiri::HTML(open(uri).read)
|
10
|
+
end
|
11
|
+
rescue OpenURI::HTTPError
|
12
|
+
@page = nil
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def parse
|
18
|
+
script_tag = page.css("script").find { |script| script.text.include?("window._sharedData") }
|
19
|
+
|
20
|
+
graphql_state = script_tag.text.
|
21
|
+
sub("window._sharedData = ", "").
|
22
|
+
sub(/;$/, "").
|
23
|
+
yield_self { |raw| JSON.parse(raw) }
|
24
|
+
|
25
|
+
root = graphql_state.dig( "entry_data", "PostPage", 0, "graphql", "shortcode_media")
|
26
|
+
|
27
|
+
if graph_images_nodes = root.dig("edge_sidecar_to_children", "edges")
|
28
|
+
graph_images_nodes.map do |graph_image|
|
29
|
+
graph_image.dig("node", "display_url")
|
30
|
+
end
|
31
|
+
else
|
32
|
+
[root["display_resources"].last["src"]]
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class Kaiyou < Parser
|
4
|
+
|
5
|
+
private
|
6
|
+
|
7
|
+
def parse
|
8
|
+
all_images = [header_image] + content_images.to_a
|
9
|
+
all_images.map do |img|
|
10
|
+
uri = Addressable::URI.parse(img["src"])
|
11
|
+
|
12
|
+
"#{uri.scheme}://#{uri.hostname}/press/img/#{uri.path.split("/").last}"
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def header_image
|
17
|
+
page.at_css("div.m-article-eyecatch img")
|
18
|
+
end
|
19
|
+
|
20
|
+
def content_images
|
21
|
+
page.css("div.m-article-main img.size-full")
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class Keyakizaka46 < Parser
|
4
|
+
def initialize(uri)
|
5
|
+
@uri = uri
|
6
|
+
@page = Nokogiri::HTML(
|
7
|
+
open(uri, {"User-Agent" => Ocawari::WINDOWS_CHROME_USER_AGENT}).read
|
8
|
+
)
|
9
|
+
|
10
|
+
rescue OpenURI::HTTPError
|
11
|
+
@page = nil
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def parse
|
17
|
+
page.css("div.box-article img").map do |img|
|
18
|
+
File.join("http://www.keyakizaka46.com", img["src"])
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class Line < Parser
|
4
|
+
|
5
|
+
private
|
6
|
+
|
7
|
+
CSS_SELECTOR_HIERARCHY = %w(
|
8
|
+
div.article-body
|
9
|
+
div.article-body-inner
|
10
|
+
img.pict
|
11
|
+
).join(" ")
|
12
|
+
|
13
|
+
def parse
|
14
|
+
image_nodes = page.css(CSS_SELECTOR_HIERARCHY)
|
15
|
+
|
16
|
+
image_nodes.map do |img|
|
17
|
+
imgname = img["src"]
|
18
|
+
|
19
|
+
case imgname
|
20
|
+
when /-s\./
|
21
|
+
imgname.sub("-s.", ".")
|
22
|
+
when /\/small$/
|
23
|
+
imgname.sub("/small", "")
|
24
|
+
else
|
25
|
+
imgname
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class MantanWeb < Parser
|
4
|
+
|
5
|
+
private
|
6
|
+
|
7
|
+
CSS_SELECTORS = [
|
8
|
+
"ul.newsbody__thumblist li.newsbody__thumb img"
|
9
|
+
]
|
10
|
+
|
11
|
+
def parse
|
12
|
+
amount_of_images = page.at_css("span.newsbody__photo-num").text.to_i
|
13
|
+
main_image = page.at_css("div.newsbody__img img")["src"]
|
14
|
+
|
15
|
+
(1..amount_of_images).to_a.map do |i|
|
16
|
+
if i < 10
|
17
|
+
main_image.sub("001_size6", "00#{i}_size10")
|
18
|
+
elsif i >= 10 && i < 100
|
19
|
+
main_image.sub("001_size6", "0#{i}_size10")
|
20
|
+
elsif i >= 100 && i < 1000
|
21
|
+
main_image.sub("001_size6", "#{i}_size10")
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class MensFashion < Parser
|
4
|
+
|
5
|
+
private
|
6
|
+
|
7
|
+
CSS_SELECTORS = [
|
8
|
+
"p img.size-full",
|
9
|
+
"img.image_ll",
|
10
|
+
"img.image_ls"
|
11
|
+
]
|
12
|
+
|
13
|
+
def parse
|
14
|
+
page.css(CSS_SELECTORS.join(", ")).map do |img|
|
15
|
+
img["src"]
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class ModelPress < Parser
|
4
|
+
|
5
|
+
private
|
6
|
+
|
7
|
+
CSS_SELECTORS = [
|
8
|
+
"div#body-top img.outputthumb",
|
9
|
+
"article.mdpr-article img.outputthumb"
|
10
|
+
]
|
11
|
+
|
12
|
+
def parse
|
13
|
+
image_links = page.css(CSS_SELECTORS.join(", ")).map { |img| img["src"] }
|
14
|
+
|
15
|
+
image_links.map do |link|
|
16
|
+
link, _query_params = link.split("?")
|
17
|
+
|
18
|
+
# width is 6000 to make Fastly return
|
19
|
+
# the largest image possible
|
20
|
+
"#{link}?width=6000&quality=100"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class NanaBunNoNijuuni < Parser
|
4
|
+
|
5
|
+
private
|
6
|
+
|
7
|
+
CSS_SELECTORS = [
|
8
|
+
"div.blog_main img"
|
9
|
+
]
|
10
|
+
|
11
|
+
def parse
|
12
|
+
page.css(CSS_SELECTORS.join(",")).map do |img|
|
13
|
+
File.join("http://blog.nanabunnonijyuuni.com", img["src"])
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class NanaGoGo < Parser
|
4
|
+
|
5
|
+
private
|
6
|
+
|
7
|
+
def parse
|
8
|
+
img = page.css("img").find { |img| img["alt"] == "投稿画像" }
|
9
|
+
|
10
|
+
if target_img = page.at("img[alt='投稿画像']")
|
11
|
+
[target_img["data-src"]]
|
12
|
+
else
|
13
|
+
[]
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class Natalie < Parser
|
4
|
+
|
5
|
+
private
|
6
|
+
|
7
|
+
CSS_SELECTORS = [
|
8
|
+
"div.NA_articleUnit ul.NA_imageList span.NA_thumb"
|
9
|
+
]
|
10
|
+
|
11
|
+
def parse
|
12
|
+
page.css(CSS_SELECTORS.join(", ")).map do |image|
|
13
|
+
image["data-bg"].sub(/fit_\d+x\d+/, "fixw_730_hq")
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class NewsDwango < Parser
|
4
|
+
|
5
|
+
private
|
6
|
+
|
7
|
+
CSS_SELECTORS = [
|
8
|
+
"#js-fancy-slider-sub img"
|
9
|
+
]
|
10
|
+
|
11
|
+
def parse
|
12
|
+
page.css(CSS_SELECTORS.join(",")).map do |img|
|
13
|
+
img["src"].sub("/lg_", "/")
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class NikkanSports < Parser
|
4
|
+
|
5
|
+
private
|
6
|
+
|
7
|
+
CSS_SELECTORS = [
|
8
|
+
"article#columnMain div.column-photo-area img"
|
9
|
+
]
|
10
|
+
|
11
|
+
def parse
|
12
|
+
page.css(CSS_SELECTORS.join(", ")).map do |image|
|
13
|
+
path = image["style"].
|
14
|
+
sub("background-image: url(", "").
|
15
|
+
sub(/\);.*$/, "")
|
16
|
+
|
17
|
+
File.join("www.nikkansports.com", path)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class OkMusicJP < Parser
|
4
|
+
|
5
|
+
private
|
6
|
+
|
7
|
+
CSS_SELECTORS = [
|
8
|
+
"div#page_frame div.center-page_box div.head-img_box img",
|
9
|
+
"div#page_frame div.center-page_box img.main-text_image"
|
10
|
+
]
|
11
|
+
|
12
|
+
def parse
|
13
|
+
page.css(CSS_SELECTORS.join(", ")).map do |img|
|
14
|
+
img["src"]
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class Sirabee < Parser
|
4
|
+
|
5
|
+
private
|
6
|
+
|
7
|
+
LARGEST_KNOWN_WIDTH = 768
|
8
|
+
LARGEST_KNOWN_HEIGHT = 512
|
9
|
+
|
10
|
+
def parse
|
11
|
+
if has_gallery_images?
|
12
|
+
page.css("div.entryGallery div.entryGallery-item img").map do |img|
|
13
|
+
img["src"].sub(/-\d{3}x\d{3}/, "-#{LARGEST_KNOWN_WIDTH}x#{LARGEST_KNOWN_HEIGHT}")
|
14
|
+
end
|
15
|
+
else
|
16
|
+
page.css("article.entryContent section.entryContentBody img.size-large").map do |img|
|
17
|
+
img["src"]
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def has_gallery_images?
|
25
|
+
page.css("div.entryGallery div.entryGallery-item img").any?
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|