ocawari 0.9.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.ruby-version +1 -0
- data/.travis.yml +6 -0
- data/Gemfile +5 -0
- data/LICENSE.txt +21 -0
- data/README.md +95 -0
- data/Rakefile +13 -0
- data/bin/console +11 -0
- data/bin/setup +8 -0
- data/exe/oca +51 -0
- data/lib/ocawari.rb +72 -0
- data/lib/ocawari/parser.rb +20 -0
- data/lib/ocawari/strategy/ameblo.rb +51 -0
- data/lib/ocawari/strategy/entame_clip.rb +18 -0
- data/lib/ocawari/strategy/gendai_business.rb +37 -0
- data/lib/ocawari/strategy/girls_news.rb +19 -0
- data/lib/ocawari/strategy/google_plus.rb +30 -0
- data/lib/ocawari/strategy/hustlepress.rb +18 -0
- data/lib/ocawari/strategy/imgur.rb +14 -0
- data/lib/ocawari/strategy/instagram.rb +37 -0
- data/lib/ocawari/strategy/kaiyou.rb +25 -0
- data/lib/ocawari/strategy/keyakizaka46.rb +23 -0
- data/lib/ocawari/strategy/line.rb +31 -0
- data/lib/ocawari/strategy/mantan_web.rb +27 -0
- data/lib/ocawari/strategy/mens_fashion.rb +20 -0
- data/lib/ocawari/strategy/modelpress.rb +25 -0
- data/lib/ocawari/strategy/nana_bun_no_nijuuni.rb +18 -0
- data/lib/ocawari/strategy/nana_go_go.rb +18 -0
- data/lib/ocawari/strategy/natalie.rb +18 -0
- data/lib/ocawari/strategy/news_dwango.rb +18 -0
- data/lib/ocawari/strategy/nikkan_sports.rb +22 -0
- data/lib/ocawari/strategy/no_match.rb +12 -0
- data/lib/ocawari/strategy/okmusicjp.rb +19 -0
- data/lib/ocawari/strategy/sirabee.rb +29 -0
- data/lib/ocawari/strategy/stereo_sound.rb +19 -0
- data/lib/ocawari/strategy/tokyo_idol_net.rb +14 -0
- data/lib/ocawari/strategy/tumblr.rb +58 -0
- data/lib/ocawari/strategy/tv_tokyo.rb +18 -0
- data/lib/ocawari/strategy/twitter.rb +29 -0
- data/lib/ocawari/strategy_delegator.rb +52 -0
- data/lib/ocawari/version.rb +3 -0
- data/ocawari.gemspec +45 -0
- metadata +342 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 5606e0448e69d81a691fd0b8309e92a7d336c956993b4508ac32a099f8aa0a25
|
4
|
+
data.tar.gz: 4ed218d1c9f3b84e1df7afbbf6b2d6c5c5f118efa3dc23fbfe4ab0e5eaa8e9dd
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 1de3b054efb3e8911913b1e076f97d8ff2698d153bbb206f3dc380217f7136b233eedf68be40cf7007dc9826b7e649b80da6ca5f7c544ee6229c850a4ccc4f6a
|
7
|
+
data.tar.gz: 046f90f9de48adc5b4cbb491f445e2a26bfb6dbbeb50884f22ddb7f2e4038280e570d7654e8c3b5c801cf61945bc8b1ba018a8825853c308d4940ee589b4a7ee
|
data/.gitignore
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.5.0
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2016 Kenneth Uy
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,95 @@
|
|
1
|
+
# Ocawari [![Build Status](https://travis-ci.org/NewSchoolKaidan/ocawari.svg?branch=master)](https://travis-ci.org/NewSchoolKaidan/ocawari)
|
2
|
+
|
3
|
+
Fetches the images from supported domains, primarily where images of [Japanese idols](http://newschoolkaidan.com/daes-idol-101/) can be found such as Twitter or Instagram and less commonly social media sites like 755 and Google Plus. Since this was developed for personal scripting use, features are added sporadically and may not be suitable for production.
|
4
|
+
|
5
|
+
## Requirements
|
6
|
+
|
7
|
+
Ruby 2.5 or higher
|
8
|
+
|
9
|
+
## Installation
|
10
|
+
|
11
|
+
Add this line to your application's Gemfile:
|
12
|
+
|
13
|
+
```ruby
|
14
|
+
gem 'ocawari'
|
15
|
+
```
|
16
|
+
|
17
|
+
And then execute:
|
18
|
+
|
19
|
+
$ bundle
|
20
|
+
|
21
|
+
Or install it yourself as:
|
22
|
+
|
23
|
+
$ gem install ocawari
|
24
|
+
|
25
|
+
## Usage
|
26
|
+
|
27
|
+
Ocawari accepts either an array of urls or a string through a public #parse class method through the top level `Ocawari` module. The #parse method can take either an array of urls or a single string. In the case that it receives no valid arguments (empty Array, empty String, nil, invalid string url, failed network requests), it will return an empty Array.
|
28
|
+
|
29
|
+
|
30
|
+
```ruby
|
31
|
+
require "ocawari"
|
32
|
+
|
33
|
+
# With url as argument
|
34
|
+
urls = [
|
35
|
+
"https://twitter.com/kayoyon213/status/853480124875919360",
|
36
|
+
"https://plus.google.com/105835152133357364264/posts/dc3mX2seBbP",
|
37
|
+
"https://www.instagram.com/p/BTbNyYFlUqK/?taken-by=katorena_ktrn&hl=ja",
|
38
|
+
]
|
39
|
+
|
40
|
+
Ocawari.parse(urls)
|
41
|
+
|
42
|
+
# => [
|
43
|
+
# "https://scontent.cdninstagram.com/t51.2885-15/e35/18096115_259830451156318_7337145356775325696_n.jpg",
|
44
|
+
# "https://pbs.twimg.com/media/C9grpPYVwAAf6tO.jpg:large",
|
45
|
+
# "https://pbs.twimg.com/media/C9grpPaUwAAUMa9.jpg:large",
|
46
|
+
# "https://lh3.googleusercontent.com/-Sbo9d0O1Y-Y/WQs4L1DTytI/AAAAAAAKPJ8/1lt7eO9qQVkJkmki95CPkN3kp9RSFiutwCHM/s0/04%2B-%2B1"
|
47
|
+
# ]
|
48
|
+
|
49
|
+
# With single string as an argument
|
50
|
+
Ocawari.parse("http://lineblog.me/2zicon/archives/1062410837.html")
|
51
|
+
|
52
|
+
# => [
|
53
|
+
# "https://obs.line-scdn.net/0m0ed7108ce84d280e363e6015367c677c327b1f3c64312d1d11035f5d2a5a4055307735714a765e0f7c27533345581e3e66635a25731668003539326f64777b1424646b252c152f55622e6f734c603c3d223f",
|
54
|
+
# "https://obs.line-scdn.net/0m0ed71085e84d280e363e6015367c677c327b290b5e304a126424787e4b287a4501485775724368697a7a37685a622f2047675a25731668003539316665777b1424616f732c152f55632e6f734c603c3d2360",
|
55
|
+
# "https://obs.line-scdn.net/0m0ed710bde84d280e363e6015367c677c327b090f7c2b2e1d713c6f654f73407317604b4549523a794358704e525a213565515a2573166800353931676b777b14246b69747f167e08332e6f734c603c3d2469",
|
56
|
+
# "https://obs.line-scdn.net/0m0ed710b6e84d280e363e6015367c677c327b1f2f74026c0b742767502e6f6f38206870357e4a6d0e6c673f525a483a3932645a2573166800353931646b777b1424656f787a152f55602e6f734c603c3d2438",
|
57
|
+
# "https://obs.line-scdn.net/0m0ed710afe84d280e363e6015367c677c327b1f10584b7b3e6f216b44216f68542c5351324350485d7d584873467d213a62545a25731668003539316568777b1424356b257d152f55612e6f734c603c3d256a"
|
58
|
+
# ]
|
59
|
+
|
60
|
+
# With invalid arguments
|
61
|
+
Ocawari.parse("")
|
62
|
+
# => []
|
63
|
+
Ocawari.parse([])
|
64
|
+
# => []
|
65
|
+
Ocawari.parse(nil)
|
66
|
+
# => []
|
67
|
+
Ocawari.parse("ASAMINはおれの嫁")
|
68
|
+
# => []
|
69
|
+
```
|
70
|
+
|
71
|
+
Ocawari also comes with a command line interface via the `oca` command. It takes any number of arguments and then outputs the urls to the images to STDOUT.
|
72
|
+
|
73
|
+
```
|
74
|
+
oca http://ameblo.jp/saho-iwatate/entry-12270820699.html
|
75
|
+
|
76
|
+
// Outputs the following
|
77
|
+
https://stat.ameba.jp/user_images/20170502/00/saho-iwatate/09/1f/j/o0480036013926951597.jpg
|
78
|
+
https://stat.ameba.jp/user_images/20170502/00/saho-iwatate/b2/8c/j/o0480036013926951622.jpg
|
79
|
+
https://stat.ameba.jp/user_images/20170502/00/saho-iwatate/7b/68/j/o0480036013926951627.jpg
|
80
|
+
https://stat.ameba.jp/user_images/20170502/00/saho-iwatate/c6/7a/j/o0480036013926951642.jpg
|
81
|
+
https://stat.ameba.jp/user_images/20170502/00/saho-iwatate/46/9f/j/o0480036013926951647.jpg
|
82
|
+
https://stat.ameba.jp/user_images/20170502/00/saho-iwatate/4d/36/j/o0480036013926951655.jpg
|
83
|
+
https://stat.ameba.jp/user_images/20170502/00/saho-iwatate/72/c7/j/o0480036013926951662.jpg
|
84
|
+
```
|
85
|
+
|
86
|
+
By doing this, I am usually running the following commands or something similar at least several times a days
|
87
|
+
|
88
|
+
```
|
89
|
+
oca https://twitter.com/CP_asami_ist/status/859057692140224514 | xargs wget -P $HOME/Pictures/渡辺亜沙美
|
90
|
+
```
|
91
|
+
|
92
|
+
|
93
|
+
## License
|
94
|
+
|
95
|
+
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "ocawari"
|
5
|
+
require "pry"
|
6
|
+
|
7
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
8
|
+
# with your gem easier. You can also use a different console, if you like.
|
9
|
+
|
10
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
11
|
+
Pry.start
|
data/bin/setup
ADDED
data/exe/oca
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "methadone"
|
4
|
+
require "ocawari"
|
5
|
+
|
6
|
+
include Methadone::Main
|
7
|
+
include Methadone::CLILogging
|
8
|
+
|
9
|
+
main do
|
10
|
+
if file = options.fetch(:input, false)
|
11
|
+
inputfile_post_urls = File.readlines(file).map(&:strip)
|
12
|
+
else
|
13
|
+
inputfile_post_urls = []
|
14
|
+
end
|
15
|
+
|
16
|
+
images = Ocawari.parse(inputfile_post_urls + ARGV)
|
17
|
+
|
18
|
+
if options.fetch(:download, false)
|
19
|
+
download_images(images)
|
20
|
+
puts "Images have finished downloading"
|
21
|
+
else
|
22
|
+
images.each { |url| puts url }
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def download_images(images)
|
27
|
+
work_queue = Queue.new
|
28
|
+
images.each { |url| work_queue << url }
|
29
|
+
workers = (0..4).map do
|
30
|
+
Thread.new do
|
31
|
+
begin
|
32
|
+
while url = work_queue.pop(true)
|
33
|
+
filename = url.split("/").last
|
34
|
+
File.open(filename, "wb") do |f|
|
35
|
+
image = open(url)
|
36
|
+
f.write(image.read)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
rescue ThreadError
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end.map(&:join)
|
43
|
+
end
|
44
|
+
|
45
|
+
on("-d", "--download", "Downloads each image that is sent to STDOUT via Ocawari after fetching the image urls")
|
46
|
+
on("-i INPUT", "--input", "Add a file which contains a list of urls with a single url on each line")
|
47
|
+
|
48
|
+
description "Ocawari CLI which outputs the images posted by your favorite idols to STDOUT"
|
49
|
+
version Ocawari::VERSION
|
50
|
+
|
51
|
+
go!
|
data/lib/ocawari.rb
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
require "json"
|
2
|
+
require "thread"
|
3
|
+
require "open-uri"
|
4
|
+
require "nokogiri"
|
5
|
+
require "addressable/uri"
|
6
|
+
|
7
|
+
require "ocawari/version"
|
8
|
+
require "ocawari/parser"
|
9
|
+
require "ocawari/strategy_delegator"
|
10
|
+
|
11
|
+
module Ocawari
|
12
|
+
|
13
|
+
WINDOWS_CHROME_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36"
|
14
|
+
WINDOWS_EDGE_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0 Safari/537.36 Edge/13.10586".freeze
|
15
|
+
|
16
|
+
def self.parse(args)
|
17
|
+
case args
|
18
|
+
when Array
|
19
|
+
return [] if args.empty?
|
20
|
+
|
21
|
+
work_queue = Queue.new
|
22
|
+
mutex = Mutex.new
|
23
|
+
collected_images = []
|
24
|
+
|
25
|
+
strategies = args.map do |url|
|
26
|
+
uri = prepare_uri(url)
|
27
|
+
strategy = StrategyDelegator.identify(uri.to_s)
|
28
|
+
[ strategy, uri ]
|
29
|
+
end
|
30
|
+
|
31
|
+
strategies.each { |taskset| work_queue << taskset }
|
32
|
+
|
33
|
+
(0..4).map do |_|
|
34
|
+
Thread.new do
|
35
|
+
begin
|
36
|
+
while taskset = work_queue.pop(true)
|
37
|
+
strategy, uri = taskset
|
38
|
+
task = strategy.new(uri)
|
39
|
+
images = task.execute
|
40
|
+
|
41
|
+
mutex.lock
|
42
|
+
collected_images += images
|
43
|
+
mutex.unlock
|
44
|
+
end
|
45
|
+
rescue ThreadError
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end.map(&:join)
|
49
|
+
|
50
|
+
collected_images.compact
|
51
|
+
when String
|
52
|
+
return [] if args.empty?
|
53
|
+
|
54
|
+
uri = prepare_uri(args)
|
55
|
+
strategy = StrategyDelegator.identify(uri.to_s)
|
56
|
+
strategy.new(uri).execute
|
57
|
+
else
|
58
|
+
[]
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
def self.prepare_uri(url)
|
65
|
+
u = Addressable::URI.parse(url)
|
66
|
+
if u.scheme.nil?
|
67
|
+
Addressable::URI.parse("http://#{u.to_s}").normalize
|
68
|
+
else
|
69
|
+
u.normalize
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
class Parser
|
2
|
+
def initialize(uri)
|
3
|
+
@uri = uri
|
4
|
+
@page = Nokogiri::HTML(open(uri).read)
|
5
|
+
rescue OpenURI::HTTPError
|
6
|
+
@page = nil
|
7
|
+
end
|
8
|
+
|
9
|
+
def execute
|
10
|
+
if page.nil?
|
11
|
+
[]
|
12
|
+
else
|
13
|
+
parse
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
attr_reader :page, :uri
|
20
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class Ameblo < Parser
|
4
|
+
|
5
|
+
private
|
6
|
+
|
7
|
+
CSS_HIERARCHY_SELECTORS = [
|
8
|
+
"article a.detailOn img",
|
9
|
+
"div.subContentsInner a.detailOn img",
|
10
|
+
"div#entryBody.articleText img"
|
11
|
+
]
|
12
|
+
|
13
|
+
def parse
|
14
|
+
script_tag = page.css("script").find { |script| script.text.include?("window.INIT_DATA") }
|
15
|
+
|
16
|
+
if script_tag
|
17
|
+
# Scrape JSON
|
18
|
+
image_nodes = script_tag.text.
|
19
|
+
split(";window")[0].
|
20
|
+
sub("window.INIT_DATA=", "").
|
21
|
+
yield_self { |raw| JSON.parse(raw) }.
|
22
|
+
dig("entryState", "entryMap", entry_id, "entry_text").
|
23
|
+
yield_self { |html_fragment| Nokogiri::HTML(html_fragment) }.
|
24
|
+
yield_self { |document| document.css("img") }.
|
25
|
+
select { |img| img["src"].include?("/user_images/") }
|
26
|
+
|
27
|
+
image_nodes.map do |img|
|
28
|
+
img["src"].
|
29
|
+
sub(/\/t\d+_/, "/o").
|
30
|
+
sub(/\?caw=800/, "")
|
31
|
+
end
|
32
|
+
else
|
33
|
+
# Scrape HTML
|
34
|
+
page.css(CSS_HIERARCHY_SELECTORS.join(", ")).reduce([]) do |images, node|
|
35
|
+
if /\.jpg|\.png/i.match?(node["src"])
|
36
|
+
highest_resolution = node["src"].sub(/\/t\d+_/, "/o")
|
37
|
+
images << highest_resolution
|
38
|
+
else
|
39
|
+
images
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
def entry_id
|
47
|
+
uri.basename.sub("entry-", "").sub(".html", "")
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class EntameClip < Parser
|
4
|
+
|
5
|
+
private
|
6
|
+
|
7
|
+
CSS_SELECTORS = [
|
8
|
+
"div#the-content div.hover-image a"
|
9
|
+
]
|
10
|
+
|
11
|
+
def parse
|
12
|
+
page.css(CSS_SELECTORS.join(",")).map do |a|
|
13
|
+
a["href"]
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Ocawari
|
2
|
+
module Strategy
|
3
|
+
class GendaiBusiness < Parser
|
4
|
+
|
5
|
+
private
|
6
|
+
|
7
|
+
def parse
|
8
|
+
(1..pagination_limit).to_a.reduce([]) do |acc, pagination|
|
9
|
+
page = Nokogiri::HTML(open(uri.to_s + "?page=#{pagination}"))
|
10
|
+
|
11
|
+
# Add header image
|
12
|
+
if pagination == 1
|
13
|
+
resource = page.at("div.articleFirstImage")["style"].
|
14
|
+
sub("background-image:url('", "").
|
15
|
+
sub("');", "")
|
16
|
+
|
17
|
+
acc << File.join("https://gendai.ismedia.jp", resource)
|
18
|
+
end
|
19
|
+
|
20
|
+
page.css("img.main-image").each do |mainimage|
|
21
|
+
acc << File.join("https://gendai.ismedia.jp", mainimage["src"])
|
22
|
+
end
|
23
|
+
|
24
|
+
acc
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def pagination_limit
|
29
|
+
page.css("div.blockContainer div.pagination li.number").
|
30
|
+
map(&:text).
|
31
|
+
map(&:strip).
|
32
|
+
map(&:to_i).
|
33
|
+
max
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|