collect_twitter_media 1.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.coveralls.yml +1 -0
- data/.gitignore +11 -0
- data/.rake_tasks~ +7 -0
- data/.rspec +3 -0
- data/.travis.yml +5 -0
- data/Gemfile +9 -0
- data/Gemfile.lock +90 -0
- data/LICENSE +21 -0
- data/README.md +126 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/circle.yml +68 -0
- data/collect_twitter_media.gemspec +38 -0
- data/lib/collect_twitter_media.rb +200 -0
- data/lib/collect_twitter_media/file_operation.rb +24 -0
- data/lib/collect_twitter_media/version.rb +3 -0
- data/the_csv_file.png +0 -0
- data/the_result_directory.jpg +0 -0
- metadata +120 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: c6b70cef56555211e910daa5d8b8fb352229c96ac575b74670a92c4eb8ace194
|
4
|
+
data.tar.gz: 3dc5335a8b1eea26c012058cec2c4844873be0c78dd0f31dc99089fbc14a41f0
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 632c049587ab4fa83b711e6a2c7e2984cf2333cdf304fc9a795e09f30f7cdbf9bc9028afb4c408d300fa4ed032bb5105668927dd55b593f44ece8c7a8215d201
|
7
|
+
data.tar.gz: 13d26ca2af15d809a284232dfcaad99fa677eabf74d9283baae4f1b93b37996171059edc82325c5a1591e6f9591e847864e637d077608f5162963ec553db944f
|
data/.coveralls.yml
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
# .coveralls.yml
|
data/.gitignore
ADDED
data/.rake_tasks~
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
@@ -0,0 +1,9 @@
|
|
1
|
+
source "https://rubygems.org"
|
2
|
+
|
3
|
+
git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
|
4
|
+
|
5
|
+
# Specify your gem's dependencies in collect_twitter_media.gemspec
|
6
|
+
gemspec
|
7
|
+
|
8
|
+
gem 'coveralls', require: false
|
9
|
+
gem 'codecov', :require => false, :group => :test
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
collect_twitter_media (1.0.0)
|
5
|
+
twitter
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: https://rubygems.org/
|
9
|
+
specs:
|
10
|
+
addressable (2.5.2)
|
11
|
+
public_suffix (>= 2.0.2, < 4.0)
|
12
|
+
buftok (0.2.0)
|
13
|
+
coveralls (0.8.21)
|
14
|
+
json (>= 1.8, < 3)
|
15
|
+
simplecov (~> 0.14.1)
|
16
|
+
term-ansicolor (~> 1.3)
|
17
|
+
thor (~> 0.19.4)
|
18
|
+
tins (~> 1.6)
|
19
|
+
diff-lcs (1.3)
|
20
|
+
docile (1.1.5)
|
21
|
+
domain_name (0.5.20170404)
|
22
|
+
unf (>= 0.0.5, < 1.0.0)
|
23
|
+
equalizer (0.0.11)
|
24
|
+
http (3.0.0)
|
25
|
+
addressable (~> 2.3)
|
26
|
+
http-cookie (~> 1.0)
|
27
|
+
http-form_data (>= 2.0.0.pre.pre2, < 3)
|
28
|
+
http_parser.rb (~> 0.6.0)
|
29
|
+
http-cookie (1.0.3)
|
30
|
+
domain_name (~> 0.5)
|
31
|
+
http-form_data (2.0.0)
|
32
|
+
http_parser.rb (0.6.0)
|
33
|
+
json (2.1.0)
|
34
|
+
memoizable (0.4.2)
|
35
|
+
thread_safe (~> 0.3, >= 0.3.1)
|
36
|
+
multipart-post (2.0.0)
|
37
|
+
naught (1.1.0)
|
38
|
+
public_suffix (3.0.1)
|
39
|
+
rake (10.5.0)
|
40
|
+
rspec (3.7.0)
|
41
|
+
rspec-core (~> 3.7.0)
|
42
|
+
rspec-expectations (~> 3.7.0)
|
43
|
+
rspec-mocks (~> 3.7.0)
|
44
|
+
rspec-core (3.7.0)
|
45
|
+
rspec-support (~> 3.7.0)
|
46
|
+
rspec-expectations (3.7.0)
|
47
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
48
|
+
rspec-support (~> 3.7.0)
|
49
|
+
rspec-mocks (3.7.0)
|
50
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
51
|
+
rspec-support (~> 3.7.0)
|
52
|
+
rspec-support (3.7.0)
|
53
|
+
simple_oauth (0.3.1)
|
54
|
+
simplecov (0.14.1)
|
55
|
+
docile (~> 1.1.0)
|
56
|
+
json (>= 1.8, < 3)
|
57
|
+
simplecov-html (~> 0.10.0)
|
58
|
+
simplecov-html (0.10.2)
|
59
|
+
term-ansicolor (1.6.0)
|
60
|
+
tins (~> 1.0)
|
61
|
+
thor (0.19.4)
|
62
|
+
thread_safe (0.3.6)
|
63
|
+
tins (1.15.1)
|
64
|
+
twitter (6.2.0)
|
65
|
+
addressable (~> 2.3)
|
66
|
+
buftok (~> 0.2.0)
|
67
|
+
equalizer (~> 0.0.11)
|
68
|
+
http (~> 3.0)
|
69
|
+
http-form_data (~> 2.0)
|
70
|
+
http_parser.rb (~> 0.6.0)
|
71
|
+
memoizable (~> 0.4.0)
|
72
|
+
multipart-post (~> 2.0)
|
73
|
+
naught (~> 1.0)
|
74
|
+
simple_oauth (~> 0.3.0)
|
75
|
+
unf (0.1.4)
|
76
|
+
unf_ext
|
77
|
+
unf_ext (0.0.7.4)
|
78
|
+
|
79
|
+
PLATFORMS
|
80
|
+
ruby
|
81
|
+
|
82
|
+
DEPENDENCIES
|
83
|
+
bundler (~> 1.16)
|
84
|
+
collect_twitter_media!
|
85
|
+
coveralls
|
86
|
+
rake (~> 10.0)
|
87
|
+
rspec (~> 3.0)
|
88
|
+
|
89
|
+
BUNDLED WITH
|
90
|
+
1.16.0
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2017 Osamu Takiya
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,126 @@
|
|
1
|
+
[![CircleCI](https://circleci.com/gh/corselia/collect_twitter_media/tree/master.svg?style=svg)](https://circleci.com/gh/corselia/collect_twitter_media/tree/master) [![codecov](https://codecov.io/gh/corselia/collect_twitter_media/branch/master/graph/badge.svg)](https://codecov.io/gh/corselia/collect_twitter_media) [![Coverage Status](https://coveralls.io/repos/github/corselia/collect_twitter_media/badge.svg)](https://coveralls.io/github/corselia/collect_twitter_media)
|
2
|
+
|
3
|
+
# Overview
|
4
|
+
- You can collect media file (sorry, now image files only except for gif and mp4)
|
5
|
+
- The source account is yourself (the media file is collected from your home timeline)
|
6
|
+
|
7
|
+
# Required
|
8
|
+
- `wget` command
|
9
|
+
|
10
|
+
# Installation
|
11
|
+
```ruby
|
12
|
+
$ gem install collect_twitter_media
|
13
|
+
```
|
14
|
+
|
15
|
+
# Usage
|
16
|
+
|
17
|
+
#### 1. require gem
|
18
|
+
```ruby
|
19
|
+
require 'collect_twitter_media'
|
20
|
+
```
|
21
|
+
|
22
|
+
#### 2. set your Twitter API token
|
23
|
+
- set the Twitter API token
|
24
|
+
|
25
|
+
```ruby
|
26
|
+
CollectTwitterMedia.consumer_key('YOUR_CONSUMER_KEY')
|
27
|
+
CollectTwitterMedia.consumer_secret('YOUR_CONSUMER_SECRET')
|
28
|
+
CollectTwitterMedia.access_token('YOUR_ACCESS_TOKEN')
|
29
|
+
CollectTwitterMedia.access_token_secret('YOUR_ACCESS_TOKEN_SECRET')
|
30
|
+
```
|
31
|
+
|
32
|
+
#### 3. 🎉exec `save` method🎉
|
33
|
+
- the first argv is the directory name to collect
|
34
|
+
|
35
|
+
```ruby
|
36
|
+
CollectTwitterMedia.save('media_collection')
|
37
|
+
```
|
38
|
+
|
39
|
+
#### 4. 🎆you have collected the media files and the csv file!🎆
|
40
|
+
- in the directory you specified, the media files have collected
|
41
|
+
- the format of filename has 3 parts
|
42
|
+
- `@twitter_screen_name`
|
43
|
+
- `tweet_id`
|
44
|
+
- `media_id`
|
45
|
+
- the example of filename
|
46
|
+
- `@dhh_934687159870177697_dPi3GGKVoAERExB.jpg`
|
47
|
+
- `@tenderlove_924694994511770368_IPi5IX-UEAAoL4v.jpg`
|
48
|
+
- in the directory you specified, the csv file has created
|
49
|
+
- the columns of this csv file
|
50
|
+
- `tweet_id`
|
51
|
+
- `screen_name`
|
52
|
+
- `original_filename`
|
53
|
+
- `save_filename`
|
54
|
+
- `uri`
|
55
|
+
- `created_at`
|
56
|
+
- the filename of this csv file has 2 parts
|
57
|
+
- `image_from_twitter`
|
58
|
+
- `%Y%m%d_%H%M%S`
|
59
|
+
- the example of filename
|
60
|
+
- `image_from_twitter_20171126_171717.csv`
|
61
|
+
- `image_from_twitter_20171224_210000.csv`
|
62
|
+
|
63
|
+
# Screenshot
|
64
|
+
- the directory which contains the media files and the csv file
|
65
|
+
![the_result_directory](the_result_directory.jpg "the_result_directory")
|
66
|
+
|
67
|
+
- the csv file
|
68
|
+
![the_csv_file](the_csv_file.png "the_csv_file")
|
69
|
+
|
70
|
+
# Options
|
71
|
+
- the `save` method can take 4 argvs
|
72
|
+
- the first: the directory name to collect [required]
|
73
|
+
- the second: the collect count of tweet per loop (default: 200) [optional]
|
74
|
+
- the third: the loop count to collect media (default: 1) [optional]
|
75
|
+
- Be careful about `API Rate limits`
|
76
|
+
- the fourth: the starting tweet id to collect media (String, default: the latest) [optional]
|
77
|
+
|
78
|
+
- example
|
79
|
+
|
80
|
+
```ruby
|
81
|
+
CollectTwitterMedia.save('media_collection', 100, 4, '12345678901234')
|
82
|
+
```
|
83
|
+
|
84
|
+
# Note
|
85
|
+
- Please, please be careful about `API Rate limits`
|
86
|
+
- [Rate limits — Twitter Developers](https://developer.twitter.com/en/docs/basics/rate-limits)
|
87
|
+
- [GET statuses/home\_timeline — Twitter Developers](https://developer.twitter.com/en/docs/tweets/timelines/api-reference/get-statuses-home_timeline)
|
88
|
+
|
89
|
+
# TODO
|
90
|
+
- collect from any users timeline
|
91
|
+
- collect by keyword search result
|
92
|
+
- collect not only image but also gif and mp4
|
93
|
+
|
94
|
+
## Development
|
95
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
96
|
+
|
97
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
98
|
+
|
99
|
+
## Contributing
|
100
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/corselia/collect-twitter-media.
|
101
|
+
|
102
|
+
## License
|
103
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
104
|
+
|
105
|
+
## History
|
106
|
+
|
107
|
+
#### 2017-12-02
|
108
|
+
- Version 1.1.3
|
109
|
+
-Oops... I changed the repository URI on GitHub... (I forgot)
|
110
|
+
|
111
|
+
#### 2017-12-02
|
112
|
+
- Version 1.1.2
|
113
|
+
- Oops! I don't know [the naming convention](https://maku77.github.io/ruby/coding-style.html) of Ruby gems. So I revert my changes at Version `1.1.1`
|
114
|
+
|
115
|
+
#### 2017-12-02
|
116
|
+
- Version 1.1.1
|
117
|
+
- modify minor typo and etc in `README.md`
|
118
|
+
- change repository name from `collect_twitter_media` to `collect-twitter-media`
|
119
|
+
|
120
|
+
#### 2017-11-26
|
121
|
+
- Version 1.1.0
|
122
|
+
- add `created_at` column to the output CSV file
|
123
|
+
|
124
|
+
#### 2017-11-26
|
125
|
+
- Version 1.0.0
|
126
|
+
- first release
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "collect_twitter_media"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/circle.yml
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
# Ruby CircleCI 2.0 configuration file
|
2
|
+
#
|
3
|
+
# Check https://circleci.com/docs/2.0/language-ruby/ for more details
|
4
|
+
#
|
5
|
+
version: 2
|
6
|
+
jobs:
|
7
|
+
build:
|
8
|
+
docker:
|
9
|
+
# specify the version you desire here
|
10
|
+
- image: circleci/ruby:2.4.1-node-browsers
|
11
|
+
|
12
|
+
# Specify service dependencies here if necessary
|
13
|
+
# CircleCI maintains a library of pre-built images
|
14
|
+
# documented at https://circleci.com/docs/2.0/circleci-images/
|
15
|
+
# - image: circleci/postgres:9.4
|
16
|
+
|
17
|
+
working_directory: ~/repo
|
18
|
+
branches:
|
19
|
+
only:
|
20
|
+
- master
|
21
|
+
steps:
|
22
|
+
- checkout
|
23
|
+
|
24
|
+
# Download and cache dependencies
|
25
|
+
- restore_cache:
|
26
|
+
keys:
|
27
|
+
- v1-dependencies-{{ checksum "Gemfile.lock" }}
|
28
|
+
# fallback to using the latest cache if no exact match is found
|
29
|
+
- v1-dependencies-
|
30
|
+
|
31
|
+
- run:
|
32
|
+
name: update bundler
|
33
|
+
command: |
|
34
|
+
gem install bundler
|
35
|
+
|
36
|
+
- run:
|
37
|
+
name: install dependencies
|
38
|
+
command: |
|
39
|
+
bundle install --jobs=4 --retry=3 --path vendor/bundle
|
40
|
+
|
41
|
+
- save_cache:
|
42
|
+
paths:
|
43
|
+
- ./vendor/bundle
|
44
|
+
key: v1-dependencies-{{ checksum "Gemfile.lock" }}
|
45
|
+
|
46
|
+
# Database setup
|
47
|
+
# - run: bundle exec rake db:create
|
48
|
+
# - run: bundle exec rake db:schema:load
|
49
|
+
|
50
|
+
# run tests!
|
51
|
+
- run:
|
52
|
+
name: run tests
|
53
|
+
command: |
|
54
|
+
mkdir /tmp/test-results
|
55
|
+
TEST_FILES="$(circleci tests glob "spec/**/*_spec.rb" | circleci tests split --split-by=timings)"
|
56
|
+
|
57
|
+
bundle exec rspec --format progress \
|
58
|
+
# --format RspecJunitFormatter \
|
59
|
+
# --out /tmp/test-results/rspec.xml \
|
60
|
+
# --format progress \
|
61
|
+
# "${TEST_FILES}"
|
62
|
+
|
63
|
+
# collect reports
|
64
|
+
- store_test_results:
|
65
|
+
path: /tmp/test-results
|
66
|
+
- store_artifacts:
|
67
|
+
path: /tmp/test-results
|
68
|
+
destination: test-results
|
@@ -0,0 +1,38 @@
|
|
1
|
+
|
2
|
+
lib = File.expand_path("../lib", __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require "collect_twitter_media/version"
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "collect_twitter_media"
|
8
|
+
spec.version = CollectTwitterMedia::VERSION
|
9
|
+
spec.authors = ["Osamu Takiya"]
|
10
|
+
spec.email = ["takiya@toran.sakura.ne.jp"]
|
11
|
+
|
12
|
+
spec.summary = %q{Collect media files from Twitter}
|
13
|
+
spec.description = %q{you can collect the media files such as .jpg, .png ... from Twitter's timeline}
|
14
|
+
spec.homepage = "https://github.com/corselia/collect-twitter-media"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
18
|
+
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
19
|
+
# if spec.respond_to?(:metadata)
|
20
|
+
# spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
|
21
|
+
# else
|
22
|
+
# raise "RubyGems 2.0 or newer is required to protect against " \
|
23
|
+
# "public gem pushes."
|
24
|
+
# end
|
25
|
+
|
26
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
27
|
+
f.match(%r{^(test|spec|features)/})
|
28
|
+
end
|
29
|
+
spec.bindir = "exe"
|
30
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
31
|
+
spec.require_paths = ["lib"]
|
32
|
+
|
33
|
+
spec.add_development_dependency "bundler", "~> 1.16"
|
34
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
35
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
36
|
+
|
37
|
+
spec.add_dependency 'twitter'
|
38
|
+
end
|
@@ -0,0 +1,200 @@
|
|
1
|
+
require 'collect_twitter_media/version'
|
2
|
+
require 'collect_twitter_media/file_operation'
|
3
|
+
require 'twitter'
|
4
|
+
require 'csv'
|
5
|
+
|
6
|
+
module CollectTwitterMedia
|
7
|
+
include FileOperation
|
8
|
+
extend self
|
9
|
+
|
10
|
+
def save(directory, tweet_count=200, loop_count=1, start_tweet_id='') # HACK: too many argv
|
11
|
+
twitter_client
|
12
|
+
tweet_collection = collect_tweets_with_loop(loop_count)
|
13
|
+
save_directory = make_directory_if_not_exist(directory)
|
14
|
+
csv_filename = create_csv_file(save_directory)
|
15
|
+
|
16
|
+
tweet_collection.each do |tweet|
|
17
|
+
tweet = original_tweet(tweet) # retweet is NOT correct data, so need to get from original tweet
|
18
|
+
save_image_file(save_directory, tweet)
|
19
|
+
append_csv_row(csv_filename, tweet)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# HACK: TOO LONG...(can I use block?)
|
24
|
+
def consumer_key(value)
|
25
|
+
@consumer_key = value
|
26
|
+
end
|
27
|
+
|
28
|
+
def consumer_secret(value)
|
29
|
+
@consumer_secret = value
|
30
|
+
end
|
31
|
+
|
32
|
+
def access_token(value)
|
33
|
+
@access_token = value
|
34
|
+
end
|
35
|
+
|
36
|
+
def access_token_secret(value)
|
37
|
+
@access_token_secret = value
|
38
|
+
end
|
39
|
+
|
40
|
+
def twitter_client
|
41
|
+
@client = Twitter::REST::Client.new do |config|
|
42
|
+
config.consumer_key = @consumer_key
|
43
|
+
config.consumer_secret = @consumer_secret
|
44
|
+
config.access_token = @access_token
|
45
|
+
config.access_token_secret = @access_token_secret
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# 'until_tweet_id' is EQUAL OR LESS THAN 'until_tweet_id'
|
50
|
+
def collect_tweets(until_tweet_id='', count=200)
|
51
|
+
begin
|
52
|
+
unless until_tweet_id.is_a?(Integer)
|
53
|
+
@client.home_timeline(count: count, include_rts: true, tweet_mode: 'extended')
|
54
|
+
else
|
55
|
+
@client.home_timeline(count: count, max_id: until_tweet_id, include_rts: true, tweet_mode: 'extended')
|
56
|
+
end
|
57
|
+
rescue => e
|
58
|
+
puts e
|
59
|
+
exit(1)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def collect_tweets_with_loop(loop_count=1, tweet_count=200, start_tweet_id='')
|
64
|
+
tweet_collection = []
|
65
|
+
until_tweet_id = start_tweet_id
|
66
|
+
|
67
|
+
loop_count.times do
|
68
|
+
tweets = collect_tweets(until_tweet_id, tweet_count)
|
69
|
+
break if tweets.empty?
|
70
|
+
tweet_collection << tweets
|
71
|
+
|
72
|
+
next_start_tweet_id = min_tweet_id(tweet_id_collection(tweets)) - 1
|
73
|
+
until_tweet_id = next_start_tweet_id
|
74
|
+
end
|
75
|
+
tweet_collection.flatten
|
76
|
+
end
|
77
|
+
|
78
|
+
def tweet_id_collection(tweets)
|
79
|
+
tweet_id_collection = []
|
80
|
+
tweets.each do |tweet|
|
81
|
+
tweet_id_collection << tweet.id
|
82
|
+
end
|
83
|
+
tweet_id_collection
|
84
|
+
end
|
85
|
+
|
86
|
+
def min_tweet_id(tweet_id_collection)
|
87
|
+
tweet_id_collection.min
|
88
|
+
end
|
89
|
+
|
90
|
+
def max_tweet_id(tweet_id_collection)
|
91
|
+
tweet_id_collection.max
|
92
|
+
end
|
93
|
+
|
94
|
+
def media_uris(tweet)
|
95
|
+
media_uris = []
|
96
|
+
if tweet.media?
|
97
|
+
tweet.media.each do |media|
|
98
|
+
if media.instance_of?(Twitter::Media::Photo)
|
99
|
+
media_uris << media.media_url_https.to_s
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
media_uris
|
104
|
+
end
|
105
|
+
|
106
|
+
def media_original_uri(media_uri)
|
107
|
+
media_original_uri = "#{media_uri}:orig"
|
108
|
+
end
|
109
|
+
|
110
|
+
def media_filename(media_uri)
|
111
|
+
media_uri.match(/https:\/\/pbs\.twimg\.com\/media\/(.*)\z/)[1] # with extension
|
112
|
+
end
|
113
|
+
|
114
|
+
def original_tweet(tweet)
|
115
|
+
if tweet.retweet?
|
116
|
+
@client.status(tweet.attrs[:retweeted_status][:id], tweet_mode: "extended") # HACK: this occurs be slow response sometimes
|
117
|
+
else
|
118
|
+
tweet
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
def save_image_file(save_directory, tweet)
|
123
|
+
media_uri_and_filename(tweet).each do |media_data|
|
124
|
+
tweet_id = media_data['tweet_id']
|
125
|
+
media_original_uri = media_data['media_original_uri']
|
126
|
+
media_filename = media_data['media_filename']
|
127
|
+
screen_name = media_data['screen_name']
|
128
|
+
|
129
|
+
command = "wget -q #{media_original_uri} -O #{save_directory}/@#{screen_name}_#{tweet_id}_#{media_filename}"
|
130
|
+
`#{command}`
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
# if several attachment image files exist, we save all ones
|
135
|
+
def media_uri_and_filename(tweet)
|
136
|
+
media_uri_and_filename = []
|
137
|
+
media_uris(tweet).each do |media_uri|
|
138
|
+
insert = {}
|
139
|
+
insert['tweet_id'] = tweet.id
|
140
|
+
insert['media_original_uri'] = media_original_uri(media_uri)
|
141
|
+
insert['media_filename'] = media_filename(media_uri)
|
142
|
+
insert['screen_name'] = tweet.attrs[:user][:screen_name]
|
143
|
+
insert['created_at'] = tweet.created_at
|
144
|
+
|
145
|
+
media_uri_and_filename << insert
|
146
|
+
end
|
147
|
+
media_uri_and_filename
|
148
|
+
end
|
149
|
+
|
150
|
+
def create_csv_file(save_directory, base_filename='image_from_twitter')
|
151
|
+
now_time = Time.now.strftime("%Y%m%d_%H%M%S")
|
152
|
+
filename = "#{save_directory}/#{base_filename}_#{now_time}.csv"
|
153
|
+
header = [
|
154
|
+
'tweet_id',
|
155
|
+
'screen_name',
|
156
|
+
'original_filename',
|
157
|
+
'save_filename',
|
158
|
+
'uri',
|
159
|
+
'created_at',
|
160
|
+
]
|
161
|
+
CSV.open(filename, 'w') do |csv_file|
|
162
|
+
csv_file << header
|
163
|
+
end
|
164
|
+
|
165
|
+
filename
|
166
|
+
end
|
167
|
+
|
168
|
+
# HACK: 'media_uri_and_filename' method is duplicated in 'save_image_file' method
|
169
|
+
def append_csv_row(csv_filename, tweet)
|
170
|
+
media_uri_and_filename(tweet).each do |media_data|
|
171
|
+
row = [
|
172
|
+
media_data['tweet_id'],
|
173
|
+
media_data['screen_name'],
|
174
|
+
media_data['media_filename'],
|
175
|
+
"@#{media_data['screen_name']}_#{media_data['tweet_id']}_#{media_data['media_filename']}",
|
176
|
+
media_data['media_original_uri'],
|
177
|
+
media_data['created_at'],
|
178
|
+
]
|
179
|
+
|
180
|
+
CSV.open(csv_filename, 'a') do |csv_file|
|
181
|
+
csv_file << row
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
# deprecated method because when tweet is retweet it doesn't work correctly
|
187
|
+
def media_uris_and_filenames(tweets)
|
188
|
+
media_uris_and_filenames = []
|
189
|
+
tweets.each do |tweet|
|
190
|
+
media_uris_and_filenames << media_uri_and_filename(tweet)
|
191
|
+
end
|
192
|
+
media_uris_and_filenames.flatten
|
193
|
+
end
|
194
|
+
|
195
|
+
# not used
|
196
|
+
def via_client(tweet)
|
197
|
+
source = tweet.source # ex. "<a href=\"http://twitter.com\" rel=\"nofollow\">Twitter Web Client</a>"
|
198
|
+
source.match(/\A<a href=".*>(.*)<\/a>\z/)[1]
|
199
|
+
end
|
200
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
|
3
|
+
module FileOperation
|
4
|
+
def make_directory_if_not_exist(directory)
|
5
|
+
directory = Pathname.new(to_pathname(directory)).to_s
|
6
|
+
unless File.exists?(directory)
|
7
|
+
command = "mkdir #{directory}"
|
8
|
+
`#{command}`
|
9
|
+
end
|
10
|
+
directory
|
11
|
+
end
|
12
|
+
|
13
|
+
def basename_of_image_file(image_file)
|
14
|
+
File.basename(image_file, '.*')
|
15
|
+
end
|
16
|
+
|
17
|
+
def remove_image(filename)
|
18
|
+
File.delete(filename)
|
19
|
+
end
|
20
|
+
|
21
|
+
def to_pathname(filename_or_dirname)
|
22
|
+
Pathname.new(Dir.pwd).join(filename_or_dirname).to_s
|
23
|
+
end
|
24
|
+
end
|
data/the_csv_file.png
ADDED
Binary file
|
Binary file
|
metadata
ADDED
@@ -0,0 +1,120 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: collect_twitter_media
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.1.3
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Osamu Takiya
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-12-02 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.16'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.16'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: twitter
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
description: you can collect the media files such as .jpg, .png ... from Twitter's
|
70
|
+
timeline
|
71
|
+
email:
|
72
|
+
- takiya@toran.sakura.ne.jp
|
73
|
+
executables: []
|
74
|
+
extensions: []
|
75
|
+
extra_rdoc_files: []
|
76
|
+
files:
|
77
|
+
- ".coveralls.yml"
|
78
|
+
- ".gitignore"
|
79
|
+
- ".rake_tasks~"
|
80
|
+
- ".rspec"
|
81
|
+
- ".travis.yml"
|
82
|
+
- Gemfile
|
83
|
+
- Gemfile.lock
|
84
|
+
- LICENSE
|
85
|
+
- README.md
|
86
|
+
- Rakefile
|
87
|
+
- bin/console
|
88
|
+
- bin/setup
|
89
|
+
- circle.yml
|
90
|
+
- collect_twitter_media.gemspec
|
91
|
+
- lib/collect_twitter_media.rb
|
92
|
+
- lib/collect_twitter_media/file_operation.rb
|
93
|
+
- lib/collect_twitter_media/version.rb
|
94
|
+
- the_csv_file.png
|
95
|
+
- the_result_directory.jpg
|
96
|
+
homepage: https://github.com/corselia/collect-twitter-media
|
97
|
+
licenses:
|
98
|
+
- MIT
|
99
|
+
metadata: {}
|
100
|
+
post_install_message:
|
101
|
+
rdoc_options: []
|
102
|
+
require_paths:
|
103
|
+
- lib
|
104
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
105
|
+
requirements:
|
106
|
+
- - ">="
|
107
|
+
- !ruby/object:Gem::Version
|
108
|
+
version: '0'
|
109
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
110
|
+
requirements:
|
111
|
+
- - ">="
|
112
|
+
- !ruby/object:Gem::Version
|
113
|
+
version: '0'
|
114
|
+
requirements: []
|
115
|
+
rubyforge_project:
|
116
|
+
rubygems_version: 2.7.2
|
117
|
+
signing_key:
|
118
|
+
specification_version: 4
|
119
|
+
summary: Collect media files from Twitter
|
120
|
+
test_files: []
|