ocawari 0.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.ruby-version +1 -0
  4. data/.travis.yml +6 -0
  5. data/Gemfile +5 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +95 -0
  8. data/Rakefile +13 -0
  9. data/bin/console +11 -0
  10. data/bin/setup +8 -0
  11. data/exe/oca +51 -0
  12. data/lib/ocawari.rb +72 -0
  13. data/lib/ocawari/parser.rb +20 -0
  14. data/lib/ocawari/strategy/ameblo.rb +51 -0
  15. data/lib/ocawari/strategy/entame_clip.rb +18 -0
  16. data/lib/ocawari/strategy/gendai_business.rb +37 -0
  17. data/lib/ocawari/strategy/girls_news.rb +19 -0
  18. data/lib/ocawari/strategy/google_plus.rb +30 -0
  19. data/lib/ocawari/strategy/hustlepress.rb +18 -0
  20. data/lib/ocawari/strategy/imgur.rb +14 -0
  21. data/lib/ocawari/strategy/instagram.rb +37 -0
  22. data/lib/ocawari/strategy/kaiyou.rb +25 -0
  23. data/lib/ocawari/strategy/keyakizaka46.rb +23 -0
  24. data/lib/ocawari/strategy/line.rb +31 -0
  25. data/lib/ocawari/strategy/mantan_web.rb +27 -0
  26. data/lib/ocawari/strategy/mens_fashion.rb +20 -0
  27. data/lib/ocawari/strategy/modelpress.rb +25 -0
  28. data/lib/ocawari/strategy/nana_bun_no_nijuuni.rb +18 -0
  29. data/lib/ocawari/strategy/nana_go_go.rb +18 -0
  30. data/lib/ocawari/strategy/natalie.rb +18 -0
  31. data/lib/ocawari/strategy/news_dwango.rb +18 -0
  32. data/lib/ocawari/strategy/nikkan_sports.rb +22 -0
  33. data/lib/ocawari/strategy/no_match.rb +12 -0
  34. data/lib/ocawari/strategy/okmusicjp.rb +19 -0
  35. data/lib/ocawari/strategy/sirabee.rb +29 -0
  36. data/lib/ocawari/strategy/stereo_sound.rb +19 -0
  37. data/lib/ocawari/strategy/tokyo_idol_net.rb +14 -0
  38. data/lib/ocawari/strategy/tumblr.rb +58 -0
  39. data/lib/ocawari/strategy/tv_tokyo.rb +18 -0
  40. data/lib/ocawari/strategy/twitter.rb +29 -0
  41. data/lib/ocawari/strategy_delegator.rb +52 -0
  42. data/lib/ocawari/version.rb +3 -0
  43. data/ocawari.gemspec +45 -0
  44. metadata +342 -0
@@ -0,0 +1,19 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class GirlsNews < Parser
4
+
5
+ private
6
+
7
+ CSS_SELECTORS = [
8
+ "div#img_field div.single_img_field_l a",
9
+ "div#img_field ul li.single_img_field_s a"
10
+ ]
11
+
12
+ def parse
13
+ page.css(CSS_SELECTORS.join(", ")).map do |a|
14
+ File.join("http://www.stereosound.co.jp", a["href"])
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,30 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class GooglePlus < Parser
4
+ def initialize(uri)
5
+ if USER_IDENTIFIER.match?(uri.to_s)
6
+ @uri = Addressable::URI.parse(uri.to_s.sub(USER_IDENTIFIER, ""))
7
+ else
8
+ @uri = uri
9
+ end
10
+
11
+ @page = Nokogiri::HTML(open(@uri).read)
12
+ rescue OpenURI::HTTPError
13
+ @page = nil
14
+ end
15
+
16
+ private
17
+
18
+ USER_IDENTIFIER = /u\/\d+\//
19
+
20
+ def parse
21
+ album_url = File.join("https://plus.google.com", @page.to_html[/(\/photos\/\d+\/albums\/\d+)/, 1])
22
+ album_page = Nokogiri::HTML(open(album_url).read)
23
+
24
+ images = album_page.css("img").map do |img|
25
+ img["src"].sub(/=w\d+-h\d+/, "=s0")
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,18 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class Hustlepress < Parser
4
+
5
+ private
6
+
7
+ CSS_SELECTORS = [
8
+ "div.post_content img.size-full"
9
+ ]
10
+
11
+ def parse
12
+ page.css(CSS_SELECTORS.join(", ")).map do |img|
13
+ img["src"]
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,14 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class Imgur < Parser
4
+
5
+ private
6
+
7
+ def parse
8
+ image_links = page.css("div.post-images a.zoom")
9
+
10
+ image_links.map { |img| "http:#{img["href"]}" }
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,37 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class Instagram < Parser
4
+ def initialize(uri)
5
+ if /\?taken-by=/.match?(uri.path)
6
+ uri.path = uri.path.sub(/\/\?taken-by=.*/, "")
7
+ @page = Nokogiri::HTML(open(uri).read)
8
+ else
9
+ @page = Nokogiri::HTML(open(uri).read)
10
+ end
11
+ rescue OpenURI::HTTPError
12
+ @page = nil
13
+ end
14
+
15
+ private
16
+
17
+ def parse
18
+ script_tag = page.css("script").find { |script| script.text.include?("window._sharedData") }
19
+
20
+ graphql_state = script_tag.text.
21
+ sub("window._sharedData = ", "").
22
+ sub(/;$/, "").
23
+ yield_self { |raw| JSON.parse(raw) }
24
+
25
+ root = graphql_state.dig( "entry_data", "PostPage", 0, "graphql", "shortcode_media")
26
+
27
+ if graph_images_nodes = root.dig("edge_sidecar_to_children", "edges")
28
+ graph_images_nodes.map do |graph_image|
29
+ graph_image.dig("node", "display_url")
30
+ end
31
+ else
32
+ [root["display_resources"].last["src"]]
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,25 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class Kaiyou < Parser
4
+
5
+ private
6
+
7
+ def parse
8
+ all_images = [header_image] + content_images.to_a
9
+ all_images.map do |img|
10
+ uri = Addressable::URI.parse(img["src"])
11
+
12
+ "#{uri.scheme}://#{uri.hostname}/press/img/#{uri.path.split("/").last}"
13
+ end
14
+ end
15
+
16
+ def header_image
17
+ page.at_css("div.m-article-eyecatch img")
18
+ end
19
+
20
+ def content_images
21
+ page.css("div.m-article-main img.size-full")
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,23 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class Keyakizaka46 < Parser
4
+ def initialize(uri)
5
+ @uri = uri
6
+ @page = Nokogiri::HTML(
7
+ open(uri, {"User-Agent" => Ocawari::WINDOWS_CHROME_USER_AGENT}).read
8
+ )
9
+
10
+ rescue OpenURI::HTTPError
11
+ @page = nil
12
+ end
13
+
14
+ private
15
+
16
+ def parse
17
+ page.css("div.box-article img").map do |img|
18
+ File.join("http://www.keyakizaka46.com", img["src"])
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,31 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class Line < Parser
4
+
5
+ private
6
+
7
+ CSS_SELECTOR_HIERARCHY = %w(
8
+ div.article-body
9
+ div.article-body-inner
10
+ img.pict
11
+ ).join(" ")
12
+
13
+ def parse
14
+ image_nodes = page.css(CSS_SELECTOR_HIERARCHY)
15
+
16
+ image_nodes.map do |img|
17
+ imgname = img["src"]
18
+
19
+ case imgname
20
+ when /-s\./
21
+ imgname.sub("-s.", ".")
22
+ when /\/small$/
23
+ imgname.sub("/small", "")
24
+ else
25
+ imgname
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,27 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class MantanWeb < Parser
4
+
5
+ private
6
+
7
+ CSS_SELECTORS = [
8
+ "ul.newsbody__thumblist li.newsbody__thumb img"
9
+ ]
10
+
11
+ def parse
12
+ amount_of_images = page.at_css("span.newsbody__photo-num").text.to_i
13
+ main_image = page.at_css("div.newsbody__img img")["src"]
14
+
15
+ (1..amount_of_images).to_a.map do |i|
16
+ if i < 10
17
+ main_image.sub("001_size6", "00#{i}_size10")
18
+ elsif i >= 10 && i < 100
19
+ main_image.sub("001_size6", "0#{i}_size10")
20
+ elsif i >= 100 && i < 1000
21
+ main_image.sub("001_size6", "#{i}_size10")
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,20 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class MensFashion < Parser
4
+
5
+ private
6
+
7
+ CSS_SELECTORS = [
8
+ "p img.size-full",
9
+ "img.image_ll",
10
+ "img.image_ls"
11
+ ]
12
+
13
+ def parse
14
+ page.css(CSS_SELECTORS.join(", ")).map do |img|
15
+ img["src"]
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,25 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class ModelPress < Parser
4
+
5
+ private
6
+
7
+ CSS_SELECTORS = [
8
+ "div#body-top img.outputthumb",
9
+ "article.mdpr-article img.outputthumb"
10
+ ]
11
+
12
+ def parse
13
+ image_links = page.css(CSS_SELECTORS.join(", ")).map { |img| img["src"] }
14
+
15
+ image_links.map do |link|
16
+ link, _query_params = link.split("?")
17
+
18
+ # width is 6000 to make Fastly return
19
+ # the largest image possible
20
+ "#{link}?width=6000&quality=100"
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,18 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class NanaBunNoNijuuni < Parser
4
+
5
+ private
6
+
7
+ CSS_SELECTORS = [
8
+ "div.blog_main img"
9
+ ]
10
+
11
+ def parse
12
+ page.css(CSS_SELECTORS.join(",")).map do |img|
13
+ File.join("http://blog.nanabunnonijyuuni.com", img["src"])
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,18 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class NanaGoGo < Parser
4
+
5
+ private
6
+
7
+ def parse
8
+ img = page.css("img").find { |img| img["alt"] == "投稿画像" }
9
+
10
+ if target_img = page.at("img[alt='投稿画像']")
11
+ [target_img["data-src"]]
12
+ else
13
+ []
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,18 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class Natalie < Parser
4
+
5
+ private
6
+
7
+ CSS_SELECTORS = [
8
+ "div.NA_articleUnit ul.NA_imageList span.NA_thumb"
9
+ ]
10
+
11
+ def parse
12
+ page.css(CSS_SELECTORS.join(", ")).map do |image|
13
+ image["data-bg"].sub(/fit_\d+x\d+/, "fixw_730_hq")
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,18 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class NewsDwango < Parser
4
+
5
+ private
6
+
7
+ CSS_SELECTORS = [
8
+ "#js-fancy-slider-sub img"
9
+ ]
10
+
11
+ def parse
12
+ page.css(CSS_SELECTORS.join(",")).map do |img|
13
+ img["src"].sub("/lg_", "/")
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,22 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class NikkanSports < Parser
4
+
5
+ private
6
+
7
+ CSS_SELECTORS = [
8
+ "article#columnMain div.column-photo-area img"
9
+ ]
10
+
11
+ def parse
12
+ page.css(CSS_SELECTORS.join(", ")).map do |image|
13
+ path = image["style"].
14
+ sub("background-image: url(", "").
15
+ sub(/\);.*$/, "")
16
+
17
+ File.join("www.nikkansports.com", path)
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,12 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class NoMatch
4
+ def initialize(uri)
5
+ end
6
+
7
+ def execute
8
+ []
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,19 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class OkMusicJP < Parser
4
+
5
+ private
6
+
7
+ CSS_SELECTORS = [
8
+ "div#page_frame div.center-page_box div.head-img_box img",
9
+ "div#page_frame div.center-page_box img.main-text_image"
10
+ ]
11
+
12
+ def parse
13
+ page.css(CSS_SELECTORS.join(", ")).map do |img|
14
+ img["src"]
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,29 @@
1
+ module Ocawari
2
+ module Strategy
3
+ class Sirabee < Parser
4
+
5
+ private
6
+
7
+ LARGEST_KNOWN_WIDTH = 768
8
+ LARGEST_KNOWN_HEIGHT = 512
9
+
10
+ def parse
11
+ if has_gallery_images?
12
+ page.css("div.entryGallery div.entryGallery-item img").map do |img|
13
+ img["src"].sub(/-\d{3}x\d{3}/, "-#{LARGEST_KNOWN_WIDTH}x#{LARGEST_KNOWN_HEIGHT}")
14
+ end
15
+ else
16
+ page.css("article.entryContent section.entryContentBody img.size-large").map do |img|
17
+ img["src"]
18
+ end
19
+ end
20
+ end
21
+
22
+ private
23
+
24
+ def has_gallery_images?
25
+ page.css("div.entryGallery div.entryGallery-item img").any?
26
+ end
27
+ end
28
+ end
29
+ end