ocawari 0.9.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.ruby-version +1 -0
- data/.travis.yml +6 -0
- data/Gemfile +5 -0
- data/LICENSE.txt +21 -0
- data/README.md +95 -0
- data/Rakefile +13 -0
- data/bin/console +11 -0
- data/bin/setup +8 -0
- data/exe/oca +51 -0
- data/lib/ocawari.rb +72 -0
- data/lib/ocawari/parser.rb +20 -0
- data/lib/ocawari/strategy/ameblo.rb +51 -0
- data/lib/ocawari/strategy/entame_clip.rb +18 -0
- data/lib/ocawari/strategy/gendai_business.rb +37 -0
- data/lib/ocawari/strategy/girls_news.rb +19 -0
- data/lib/ocawari/strategy/google_plus.rb +30 -0
- data/lib/ocawari/strategy/hustlepress.rb +18 -0
- data/lib/ocawari/strategy/imgur.rb +14 -0
- data/lib/ocawari/strategy/instagram.rb +37 -0
- data/lib/ocawari/strategy/kaiyou.rb +25 -0
- data/lib/ocawari/strategy/keyakizaka46.rb +23 -0
- data/lib/ocawari/strategy/line.rb +31 -0
- data/lib/ocawari/strategy/mantan_web.rb +27 -0
- data/lib/ocawari/strategy/mens_fashion.rb +20 -0
- data/lib/ocawari/strategy/modelpress.rb +25 -0
- data/lib/ocawari/strategy/nana_bun_no_nijuuni.rb +18 -0
- data/lib/ocawari/strategy/nana_go_go.rb +18 -0
- data/lib/ocawari/strategy/natalie.rb +18 -0
- data/lib/ocawari/strategy/news_dwango.rb +18 -0
- data/lib/ocawari/strategy/nikkan_sports.rb +22 -0
- data/lib/ocawari/strategy/no_match.rb +12 -0
- data/lib/ocawari/strategy/okmusicjp.rb +19 -0
- data/lib/ocawari/strategy/sirabee.rb +29 -0
- data/lib/ocawari/strategy/stereo_sound.rb +19 -0
- data/lib/ocawari/strategy/tokyo_idol_net.rb +14 -0
- data/lib/ocawari/strategy/tumblr.rb +58 -0
- data/lib/ocawari/strategy/tv_tokyo.rb +18 -0
- data/lib/ocawari/strategy/twitter.rb +29 -0
- data/lib/ocawari/strategy_delegator.rb +52 -0
- data/lib/ocawari/version.rb +3 -0
- data/ocawari.gemspec +45 -0
- metadata +342 -0
@@ -0,0 +1,19 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class GirlsNews < Parser
|
4
|
+
|
5
|
+
private
|
6
|
+
|
7
|
+
CSS_SELECTORS = [
|
8
|
+
"div#img_field div.single_img_field_l a",
|
9
|
+
"div#img_field ul li.single_img_field_s a"
|
10
|
+
]
|
11
|
+
|
12
|
+
def parse
|
13
|
+
page.css(CSS_SELECTORS.join(", ")).map do |a|
|
14
|
+
File.join("http://www.stereosound.co.jp", a["href"])
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class GooglePlus < Parser
|
4
|
+
def initialize(uri)
|
5
|
+
if USER_IDENTIFIER.match?(uri.to_s)
|
6
|
+
@uri = Addressable::URI.parse(uri.to_s.sub(USER_IDENTIFIER, ""))
|
7
|
+
else
|
8
|
+
@uri = uri
|
9
|
+
end
|
10
|
+
|
11
|
+
@page = Nokogiri::HTML(open(@uri).read)
|
12
|
+
rescue OpenURI::HTTPError
|
13
|
+
@page = nil
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
USER_IDENTIFIER = /u\/\d+\//
|
19
|
+
|
20
|
+
def parse
|
21
|
+
album_url = File.join("https://plus.google.com", @page.to_html[/(\/photos\/\d+\/albums\/\d+)/, 1])
|
22
|
+
album_page = Nokogiri::HTML(open(album_url).read)
|
23
|
+
|
24
|
+
images = album_page.css("img").map do |img|
|
25
|
+
img["src"].sub(/=w\d+-h\d+/, "=s0")
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class Hustlepress < Parser
|
4
|
+
|
5
|
+
private
|
6
|
+
|
7
|
+
CSS_SELECTORS = [
|
8
|
+
"div.post_content img.size-full"
|
9
|
+
]
|
10
|
+
|
11
|
+
def parse
|
12
|
+
page.css(CSS_SELECTORS.join(", ")).map do |img|
|
13
|
+
img["src"]
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class Instagram < Parser
|
4
|
+
def initialize(uri)
|
5
|
+
if /\?taken-by=/.match?(uri.path)
|
6
|
+
uri.path = uri.path.sub(/\/\?taken-by=.*/, "")
|
7
|
+
@page = Nokogiri::HTML(open(uri).read)
|
8
|
+
else
|
9
|
+
@page = Nokogiri::HTML(open(uri).read)
|
10
|
+
end
|
11
|
+
rescue OpenURI::HTTPError
|
12
|
+
@page = nil
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def parse
|
18
|
+
script_tag = page.css("script").find { |script| script.text.include?("window._sharedData") }
|
19
|
+
|
20
|
+
graphql_state = script_tag.text.
|
21
|
+
sub("window._sharedData = ", "").
|
22
|
+
sub(/;$/, "").
|
23
|
+
yield_self { |raw| JSON.parse(raw) }
|
24
|
+
|
25
|
+
root = graphql_state.dig( "entry_data", "PostPage", 0, "graphql", "shortcode_media")
|
26
|
+
|
27
|
+
if graph_images_nodes = root.dig("edge_sidecar_to_children", "edges")
|
28
|
+
graph_images_nodes.map do |graph_image|
|
29
|
+
graph_image.dig("node", "display_url")
|
30
|
+
end
|
31
|
+
else
|
32
|
+
[root["display_resources"].last["src"]]
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class Kaiyou < Parser
|
4
|
+
|
5
|
+
private
|
6
|
+
|
7
|
+
def parse
|
8
|
+
all_images = [header_image] + content_images.to_a
|
9
|
+
all_images.map do |img|
|
10
|
+
uri = Addressable::URI.parse(img["src"])
|
11
|
+
|
12
|
+
"#{uri.scheme}://#{uri.hostname}/press/img/#{uri.path.split("/").last}"
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def header_image
|
17
|
+
page.at_css("div.m-article-eyecatch img")
|
18
|
+
end
|
19
|
+
|
20
|
+
def content_images
|
21
|
+
page.css("div.m-article-main img.size-full")
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class Keyakizaka46 < Parser
|
4
|
+
def initialize(uri)
|
5
|
+
@uri = uri
|
6
|
+
@page = Nokogiri::HTML(
|
7
|
+
open(uri, {"User-Agent" => Ocawari::WINDOWS_CHROME_USER_AGENT}).read
|
8
|
+
)
|
9
|
+
|
10
|
+
rescue OpenURI::HTTPError
|
11
|
+
@page = nil
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def parse
|
17
|
+
page.css("div.box-article img").map do |img|
|
18
|
+
File.join("http://www.keyakizaka46.com", img["src"])
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class Line < Parser
|
4
|
+
|
5
|
+
private
|
6
|
+
|
7
|
+
CSS_SELECTOR_HIERARCHY = %w(
|
8
|
+
div.article-body
|
9
|
+
div.article-body-inner
|
10
|
+
img.pict
|
11
|
+
).join(" ")
|
12
|
+
|
13
|
+
def parse
|
14
|
+
image_nodes = page.css(CSS_SELECTOR_HIERARCHY)
|
15
|
+
|
16
|
+
image_nodes.map do |img|
|
17
|
+
imgname = img["src"]
|
18
|
+
|
19
|
+
case imgname
|
20
|
+
when /-s\./
|
21
|
+
imgname.sub("-s.", ".")
|
22
|
+
when /\/small$/
|
23
|
+
imgname.sub("/small", "")
|
24
|
+
else
|
25
|
+
imgname
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class MantanWeb < Parser
|
4
|
+
|
5
|
+
private
|
6
|
+
|
7
|
+
CSS_SELECTORS = [
|
8
|
+
"ul.newsbody__thumblist li.newsbody__thumb img"
|
9
|
+
]
|
10
|
+
|
11
|
+
def parse
|
12
|
+
amount_of_images = page.at_css("span.newsbody__photo-num").text.to_i
|
13
|
+
main_image = page.at_css("div.newsbody__img img")["src"]
|
14
|
+
|
15
|
+
(1..amount_of_images).to_a.map do |i|
|
16
|
+
if i < 10
|
17
|
+
main_image.sub("001_size6", "00#{i}_size10")
|
18
|
+
elsif i >= 10 && i < 100
|
19
|
+
main_image.sub("001_size6", "0#{i}_size10")
|
20
|
+
elsif i >= 100 && i < 1000
|
21
|
+
main_image.sub("001_size6", "#{i}_size10")
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class MensFashion < Parser
|
4
|
+
|
5
|
+
private
|
6
|
+
|
7
|
+
CSS_SELECTORS = [
|
8
|
+
"p img.size-full",
|
9
|
+
"img.image_ll",
|
10
|
+
"img.image_ls"
|
11
|
+
]
|
12
|
+
|
13
|
+
def parse
|
14
|
+
page.css(CSS_SELECTORS.join(", ")).map do |img|
|
15
|
+
img["src"]
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class ModelPress < Parser
|
4
|
+
|
5
|
+
private
|
6
|
+
|
7
|
+
CSS_SELECTORS = [
|
8
|
+
"div#body-top img.outputthumb",
|
9
|
+
"article.mdpr-article img.outputthumb"
|
10
|
+
]
|
11
|
+
|
12
|
+
def parse
|
13
|
+
image_links = page.css(CSS_SELECTORS.join(", ")).map { |img| img["src"] }
|
14
|
+
|
15
|
+
image_links.map do |link|
|
16
|
+
link, _query_params = link.split("?")
|
17
|
+
|
18
|
+
# width is 6000 to make Fastly return
|
19
|
+
# the largest image possible
|
20
|
+
"#{link}?width=6000&quality=100"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class NanaBunNoNijuuni < Parser
|
4
|
+
|
5
|
+
private
|
6
|
+
|
7
|
+
CSS_SELECTORS = [
|
8
|
+
"div.blog_main img"
|
9
|
+
]
|
10
|
+
|
11
|
+
def parse
|
12
|
+
page.css(CSS_SELECTORS.join(",")).map do |img|
|
13
|
+
File.join("http://blog.nanabunnonijyuuni.com", img["src"])
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class NanaGoGo < Parser
|
4
|
+
|
5
|
+
private
|
6
|
+
|
7
|
+
def parse
|
8
|
+
img = page.css("img").find { |img| img["alt"] == "投稿画像" }
|
9
|
+
|
10
|
+
if target_img = page.at("img[alt='投稿画像']")
|
11
|
+
[target_img["data-src"]]
|
12
|
+
else
|
13
|
+
[]
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class Natalie < Parser
|
4
|
+
|
5
|
+
private
|
6
|
+
|
7
|
+
CSS_SELECTORS = [
|
8
|
+
"div.NA_articleUnit ul.NA_imageList span.NA_thumb"
|
9
|
+
]
|
10
|
+
|
11
|
+
def parse
|
12
|
+
page.css(CSS_SELECTORS.join(", ")).map do |image|
|
13
|
+
image["data-bg"].sub(/fit_\d+x\d+/, "fixw_730_hq")
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class NewsDwango < Parser
|
4
|
+
|
5
|
+
private
|
6
|
+
|
7
|
+
CSS_SELECTORS = [
|
8
|
+
"#js-fancy-slider-sub img"
|
9
|
+
]
|
10
|
+
|
11
|
+
def parse
|
12
|
+
page.css(CSS_SELECTORS.join(",")).map do |img|
|
13
|
+
img["src"].sub("/lg_", "/")
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class NikkanSports < Parser
|
4
|
+
|
5
|
+
private
|
6
|
+
|
7
|
+
CSS_SELECTORS = [
|
8
|
+
"article#columnMain div.column-photo-area img"
|
9
|
+
]
|
10
|
+
|
11
|
+
def parse
|
12
|
+
page.css(CSS_SELECTORS.join(", ")).map do |image|
|
13
|
+
path = image["style"].
|
14
|
+
sub("background-image: url(", "").
|
15
|
+
sub(/\);.*$/, "")
|
16
|
+
|
17
|
+
File.join("www.nikkansports.com", path)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class OkMusicJP < Parser
|
4
|
+
|
5
|
+
private
|
6
|
+
|
7
|
+
CSS_SELECTORS = [
|
8
|
+
"div#page_frame div.center-page_box div.head-img_box img",
|
9
|
+
"div#page_frame div.center-page_box img.main-text_image"
|
10
|
+
]
|
11
|
+
|
12
|
+
def parse
|
13
|
+
page.css(CSS_SELECTORS.join(", ")).map do |img|
|
14
|
+
img["src"]
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class Sirabee < Parser
|
4
|
+
|
5
|
+
private
|
6
|
+
|
7
|
+
LARGEST_KNOWN_WIDTH = 768
|
8
|
+
LARGEST_KNOWN_HEIGHT = 512
|
9
|
+
|
10
|
+
def parse
|
11
|
+
if has_gallery_images?
|
12
|
+
page.css("div.entryGallery div.entryGallery-item img").map do |img|
|
13
|
+
img["src"].sub(/-\d{3}x\d{3}/, "-#{LARGEST_KNOWN_WIDTH}x#{LARGEST_KNOWN_HEIGHT}")
|
14
|
+
end
|
15
|
+
else
|
16
|
+
page.css("article.entryContent section.entryContentBody img.size-large").map do |img|
|
17
|
+
img["src"]
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def has_gallery_images?
|
25
|
+
page.css("div.entryGallery div.entryGallery-item img").any?
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|