ocawari 0.9.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.ruby-version +1 -0
  4. data/.travis.yml +6 -0
  5. data/Gemfile +5 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +95 -0
  8. data/Rakefile +13 -0
  9. data/bin/console +11 -0
  10. data/bin/setup +8 -0
  11. data/exe/oca +51 -0
  12. data/lib/ocawari.rb +72 -0
  13. data/lib/ocawari/parser.rb +20 -0
  14. data/lib/ocawari/strategy/ameblo.rb +51 -0
  15. data/lib/ocawari/strategy/entame_clip.rb +18 -0
  16. data/lib/ocawari/strategy/gendai_business.rb +37 -0
  17. data/lib/ocawari/strategy/girls_news.rb +19 -0
  18. data/lib/ocawari/strategy/google_plus.rb +30 -0
  19. data/lib/ocawari/strategy/hustlepress.rb +18 -0
  20. data/lib/ocawari/strategy/imgur.rb +14 -0
  21. data/lib/ocawari/strategy/instagram.rb +37 -0
  22. data/lib/ocawari/strategy/kaiyou.rb +25 -0
  23. data/lib/ocawari/strategy/keyakizaka46.rb +23 -0
  24. data/lib/ocawari/strategy/line.rb +31 -0
  25. data/lib/ocawari/strategy/mantan_web.rb +27 -0
  26. data/lib/ocawari/strategy/mens_fashion.rb +20 -0
  27. data/lib/ocawari/strategy/modelpress.rb +25 -0
  28. data/lib/ocawari/strategy/nana_bun_no_nijuuni.rb +18 -0
  29. data/lib/ocawari/strategy/nana_go_go.rb +18 -0
  30. data/lib/ocawari/strategy/natalie.rb +18 -0
  31. data/lib/ocawari/strategy/news_dwango.rb +18 -0
  32. data/lib/ocawari/strategy/nikkan_sports.rb +22 -0
  33. data/lib/ocawari/strategy/no_match.rb +12 -0
  34. data/lib/ocawari/strategy/okmusicjp.rb +19 -0
  35. data/lib/ocawari/strategy/sirabee.rb +29 -0
  36. data/lib/ocawari/strategy/stereo_sound.rb +19 -0
  37. data/lib/ocawari/strategy/tokyo_idol_net.rb +14 -0
  38. data/lib/ocawari/strategy/tumblr.rb +58 -0
  39. data/lib/ocawari/strategy/tv_tokyo.rb +18 -0
  40. data/lib/ocawari/strategy/twitter.rb +29 -0
  41. data/lib/ocawari/strategy_delegator.rb +52 -0
  42. data/lib/ocawari/version.rb +3 -0
  43. data/ocawari.gemspec +45 -0
  44. metadata +342 -0
@@ -0,0 +1,19 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class GirlsNews < Parser
4
+
5
+ private
6
+
7
+ CSS_SELECTORS = [
8
+ "div#img_field div.single_img_field_l a",
9
+ "div#img_field ul li.single_img_field_s a"
10
+ ]
11
+
12
+ def parse
13
+ page.css(CSS_SELECTORS.join(", ")).map do |a|
14
+ File.join("http://www.stereosound.co.jp", a["href"])
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,30 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class GooglePlus < Parser
4
+ def initialize(uri)
5
+ if USER_IDENTIFIER.match?(uri.to_s)
6
+ @uri = Addressable::URI.parse(uri.to_s.sub(USER_IDENTIFIER, ""))
7
+ else
8
+ @uri = uri
9
+ end
10
+
11
+ @page = Nokogiri::HTML(open(@uri).read)
12
+ rescue OpenURI::HTTPError
13
+ @page = nil
14
+ end
15
+
16
+ private
17
+
18
+ USER_IDENTIFIER = /u\/\d+\//
19
+
20
+ def parse
21
+ album_url = File.join("https://plus.google.com", @page.to_html[/(\/photos\/\d+\/albums\/\d+)/, 1])
22
+ album_page = Nokogiri::HTML(open(album_url).read)
23
+
24
+ images = album_page.css("img").map do |img|
25
+ img["src"].sub(/=w\d+-h\d+/, "=s0")
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,18 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class Hustlepress < Parser
4
+
5
+ private
6
+
7
+ CSS_SELECTORS = [
8
+ "div.post_content img.size-full"
9
+ ]
10
+
11
+ def parse
12
+ page.css(CSS_SELECTORS.join(", ")).map do |img|
13
+ img["src"]
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,14 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class Imgur < Parser
4
+
5
+ private
6
+
7
+ def parse
8
+ image_links = page.css("div.post-images a.zoom")
9
+
10
+ image_links.map { |img| "http:#{img["href"]}" }
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,37 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class Instagram < Parser
4
+ def initialize(uri)
5
+ if /\?taken-by=/.match?(uri.path)
6
+ uri.path = uri.path.sub(/\/\?taken-by=.*/, "")
7
+ @page = Nokogiri::HTML(open(uri).read)
8
+ else
9
+ @page = Nokogiri::HTML(open(uri).read)
10
+ end
11
+ rescue OpenURI::HTTPError
12
+ @page = nil
13
+ end
14
+
15
+ private
16
+
17
+ def parse
18
+ script_tag = page.css("script").find { |script| script.text.include?("window._sharedData") }
19
+
20
+ graphql_state = script_tag.text.
21
+ sub("window._sharedData = ", "").
22
+ sub(/;$/, "").
23
+ yield_self { |raw| JSON.parse(raw) }
24
+
25
+ root = graphql_state.dig( "entry_data", "PostPage", 0, "graphql", "shortcode_media")
26
+
27
+ if graph_images_nodes = root.dig("edge_sidecar_to_children", "edges")
28
+ graph_images_nodes.map do |graph_image|
29
+ graph_image.dig("node", "display_url")
30
+ end
31
+ else
32
+ [root["display_resources"].last["src"]]
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,25 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class Kaiyou < Parser
4
+
5
+ private
6
+
7
+ def parse
8
+ all_images = [header_image] + content_images.to_a
9
+ all_images.map do |img|
10
+ uri = Addressable::URI.parse(img["src"])
11
+
12
+ "#{uri.scheme}://#{uri.hostname}/press/img/#{uri.path.split("/").last}"
13
+ end
14
+ end
15
+
16
+ def header_image
17
+ page.at_css("div.m-article-eyecatch img")
18
+ end
19
+
20
+ def content_images
21
+ page.css("div.m-article-main img.size-full")
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,23 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class Keyakizaka46 < Parser
4
+ def initialize(uri)
5
+ @uri = uri
6
+ @page = Nokogiri::HTML(
7
+ open(uri, {"User-Agent" => Ocawari::WINDOWS_CHROME_USER_AGENT}).read
8
+ )
9
+
10
+ rescue OpenURI::HTTPError
11
+ @page = nil
12
+ end
13
+
14
+ private
15
+
16
+ def parse
17
+ page.css("div.box-article img").map do |img|
18
+ File.join("http://www.keyakizaka46.com", img["src"])
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,31 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class Line < Parser
4
+
5
+ private
6
+
7
+ CSS_SELECTOR_HIERARCHY = %w(
8
+ div.article-body
9
+ div.article-body-inner
10
+ img.pict
11
+ ).join(" ")
12
+
13
+ def parse
14
+ image_nodes = page.css(CSS_SELECTOR_HIERARCHY)
15
+
16
+ image_nodes.map do |img|
17
+ imgname = img["src"]
18
+
19
+ case imgname
20
+ when /-s\./
21
+ imgname.sub("-s.", ".")
22
+ when /\/small$/
23
+ imgname.sub("/small", "")
24
+ else
25
+ imgname
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,27 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class MantanWeb < Parser
4
+
5
+ private
6
+
7
+ CSS_SELECTORS = [
8
+ "ul.newsbody__thumblist li.newsbody__thumb img"
9
+ ]
10
+
11
+ def parse
12
+ amount_of_images = page.at_css("span.newsbody__photo-num").text.to_i
13
+ main_image = page.at_css("div.newsbody__img img")["src"]
14
+
15
+ (1..amount_of_images).to_a.map do |i|
16
+ if i < 10
17
+ main_image.sub("001_size6", "00#{i}_size10")
18
+ elsif i >= 10 && i < 100
19
+ main_image.sub("001_size6", "0#{i}_size10")
20
+ elsif i >= 100 && i < 1000
21
+ main_image.sub("001_size6", "#{i}_size10")
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,20 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class MensFashion < Parser
4
+
5
+ private
6
+
7
+ CSS_SELECTORS = [
8
+ "p img.size-full",
9
+ "img.image_ll",
10
+ "img.image_ls"
11
+ ]
12
+
13
+ def parse
14
+ page.css(CSS_SELECTORS.join(", ")).map do |img|
15
+ img["src"]
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,25 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class ModelPress < Parser
4
+
5
+ private
6
+
7
+ CSS_SELECTORS = [
8
+ "div#body-top img.outputthumb",
9
+ "article.mdpr-article img.outputthumb"
10
+ ]
11
+
12
+ def parse
13
+ image_links = page.css(CSS_SELECTORS.join(", ")).map { |img| img["src"] }
14
+
15
+ image_links.map do |link|
16
+ link, _query_params = link.split("?")
17
+
18
+ # width is 6000 to make Fastly return
19
+ # the largest image possible
20
+ "#{link}?width=6000&quality=100"
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,18 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class NanaBunNoNijuuni < Parser
4
+
5
+ private
6
+
7
+ CSS_SELECTORS = [
8
+ "div.blog_main img"
9
+ ]
10
+
11
+ def parse
12
+ page.css(CSS_SELECTORS.join(",")).map do |img|
13
+ File.join("http://blog.nanabunnonijyuuni.com", img["src"])
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,18 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class NanaGoGo < Parser
4
+
5
+ private
6
+
7
+ def parse
8
+ img = page.css("img").find { |img| img["alt"] == "投稿画像" }
9
+
10
+ if target_img = page.at("img[alt='投稿画像']")
11
+ [target_img["data-src"]]
12
+ else
13
+ []
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,18 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class Natalie < Parser
4
+
5
+ private
6
+
7
+ CSS_SELECTORS = [
8
+ "div.NA_articleUnit ul.NA_imageList span.NA_thumb"
9
+ ]
10
+
11
+ def parse
12
+ page.css(CSS_SELECTORS.join(", ")).map do |image|
13
+ image["data-bg"].sub(/fit_\d+x\d+/, "fixw_730_hq")
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,18 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class NewsDwango < Parser
4
+
5
+ private
6
+
7
+ CSS_SELECTORS = [
8
+ "#js-fancy-slider-sub img"
9
+ ]
10
+
11
+ def parse
12
+ page.css(CSS_SELECTORS.join(",")).map do |img|
13
+ img["src"].sub("/lg_", "/")
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,22 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class NikkanSports < Parser
4
+
5
+ private
6
+
7
+ CSS_SELECTORS = [
8
+ "article#columnMain div.column-photo-area img"
9
+ ]
10
+
11
+ def parse
12
+ page.css(CSS_SELECTORS.join(", ")).map do |image|
13
+ path = image["style"].
14
+ sub("background-image: url(", "").
15
+ sub(/\);.*$/, "")
16
+
17
+ File.join("www.nikkansports.com", path)
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,12 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class NoMatch
4
+ def initialize(uri)
5
+ end
6
+
7
+ def execute
8
+ []
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,19 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class OkMusicJP < Parser
4
+
5
+ private
6
+
7
+ CSS_SELECTORS = [
8
+ "div#page_frame div.center-page_box div.head-img_box img",
9
+ "div#page_frame div.center-page_box img.main-text_image"
10
+ ]
11
+
12
+ def parse
13
+ page.css(CSS_SELECTORS.join(", ")).map do |img|
14
+ img["src"]
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,29 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class Sirabee < Parser
4
+
5
+ private
6
+
7
+ LARGEST_KNOWN_WIDTH = 768
8
+ LARGEST_KNOWN_HEIGHT = 512
9
+
10
+ def parse
11
+ if has_gallery_images?
12
+ page.css("div.entryGallery div.entryGallery-item img").map do |img|
13
+ img["src"].sub(/-\d{3}x\d{3}/, "-#{LARGEST_KNOWN_WIDTH}x#{LARGEST_KNOWN_HEIGHT}")
14
+ end
15
+ else
16
+ page.css("article.entryContent section.entryContentBody img.size-large").map do |img|
17
+ img["src"]
18
+ end
19
+ end
20
+ end
21
+
22
+ private
23
+
24
+ def has_gallery_images?
25
+ page.css("div.entryGallery div.entryGallery-item img").any?
26
+ end
27
+ end
28
+ end
29
+ end