collect_twitter_media 1.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: c6b70cef56555211e910daa5d8b8fb352229c96ac575b74670a92c4eb8ace194
4
+ data.tar.gz: 3dc5335a8b1eea26c012058cec2c4844873be0c78dd0f31dc99089fbc14a41f0
5
+ SHA512:
6
+ metadata.gz: 632c049587ab4fa83b711e6a2c7e2984cf2333cdf304fc9a795e09f30f7cdbf9bc9028afb4c408d300fa4ed032bb5105668927dd55b593f44ece8c7a8215d201
7
+ data.tar.gz: 13d26ca2af15d809a284232dfcaad99fa677eabf74d9283baae4f1b93b37996171059edc82325c5a1591e6f9591e847864e637d077608f5162963ec553db944f
data/.coveralls.yml ADDED
@@ -0,0 +1 @@
1
+ # .coveralls.yml
data/.gitignore ADDED
@@ -0,0 +1,11 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
data/.rake_tasks~ ADDED
@@ -0,0 +1,7 @@
1
+ build
2
+ clean
3
+ clobber
4
+ install
5
+ install:local
6
+ release[remote]
7
+ spec
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.4.1
5
+ before_install: gem install bundler -v 1.16.0
data/Gemfile ADDED
@@ -0,0 +1,9 @@
1
+ source "https://rubygems.org"
2
+
3
+ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
4
+
5
+ # Specify your gem's dependencies in collect_twitter_media.gemspec
6
+ gemspec
7
+
8
+ gem 'coveralls', require: false
9
+ gem 'codecov', :require => false, :group => :test
data/Gemfile.lock ADDED
@@ -0,0 +1,90 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ collect_twitter_media (1.0.0)
5
+ twitter
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ addressable (2.5.2)
11
+ public_suffix (>= 2.0.2, < 4.0)
12
+ buftok (0.2.0)
13
+ coveralls (0.8.21)
14
+ json (>= 1.8, < 3)
15
+ simplecov (~> 0.14.1)
16
+ term-ansicolor (~> 1.3)
17
+ thor (~> 0.19.4)
18
+ tins (~> 1.6)
19
+ diff-lcs (1.3)
20
+ docile (1.1.5)
21
+ domain_name (0.5.20170404)
22
+ unf (>= 0.0.5, < 1.0.0)
23
+ equalizer (0.0.11)
24
+ http (3.0.0)
25
+ addressable (~> 2.3)
26
+ http-cookie (~> 1.0)
27
+ http-form_data (>= 2.0.0.pre.pre2, < 3)
28
+ http_parser.rb (~> 0.6.0)
29
+ http-cookie (1.0.3)
30
+ domain_name (~> 0.5)
31
+ http-form_data (2.0.0)
32
+ http_parser.rb (0.6.0)
33
+ json (2.1.0)
34
+ memoizable (0.4.2)
35
+ thread_safe (~> 0.3, >= 0.3.1)
36
+ multipart-post (2.0.0)
37
+ naught (1.1.0)
38
+ public_suffix (3.0.1)
39
+ rake (10.5.0)
40
+ rspec (3.7.0)
41
+ rspec-core (~> 3.7.0)
42
+ rspec-expectations (~> 3.7.0)
43
+ rspec-mocks (~> 3.7.0)
44
+ rspec-core (3.7.0)
45
+ rspec-support (~> 3.7.0)
46
+ rspec-expectations (3.7.0)
47
+ diff-lcs (>= 1.2.0, < 2.0)
48
+ rspec-support (~> 3.7.0)
49
+ rspec-mocks (3.7.0)
50
+ diff-lcs (>= 1.2.0, < 2.0)
51
+ rspec-support (~> 3.7.0)
52
+ rspec-support (3.7.0)
53
+ simple_oauth (0.3.1)
54
+ simplecov (0.14.1)
55
+ docile (~> 1.1.0)
56
+ json (>= 1.8, < 3)
57
+ simplecov-html (~> 0.10.0)
58
+ simplecov-html (0.10.2)
59
+ term-ansicolor (1.6.0)
60
+ tins (~> 1.0)
61
+ thor (0.19.4)
62
+ thread_safe (0.3.6)
63
+ tins (1.15.1)
64
+ twitter (6.2.0)
65
+ addressable (~> 2.3)
66
+ buftok (~> 0.2.0)
67
+ equalizer (~> 0.0.11)
68
+ http (~> 3.0)
69
+ http-form_data (~> 2.0)
70
+ http_parser.rb (~> 0.6.0)
71
+ memoizable (~> 0.4.0)
72
+ multipart-post (~> 2.0)
73
+ naught (~> 1.0)
74
+ simple_oauth (~> 0.3.0)
75
+ unf (0.1.4)
76
+ unf_ext
77
+ unf_ext (0.0.7.4)
78
+
79
+ PLATFORMS
80
+ ruby
81
+
82
+ DEPENDENCIES
83
+ bundler (~> 1.16)
84
+ collect_twitter_media!
85
+ coveralls
86
+ rake (~> 10.0)
87
+ rspec (~> 3.0)
88
+
89
+ BUNDLED WITH
90
+ 1.16.0
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2017 Osamu Takiya
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,126 @@
1
+ [![CircleCI](https://circleci.com/gh/corselia/collect_twitter_media/tree/master.svg?style=svg)](https://circleci.com/gh/corselia/collect_twitter_media/tree/master) [![codecov](https://codecov.io/gh/corselia/collect_twitter_media/branch/master/graph/badge.svg)](https://codecov.io/gh/corselia/collect_twitter_media) [![Coverage Status](https://coveralls.io/repos/github/corselia/collect_twitter_media/badge.svg)](https://coveralls.io/github/corselia/collect_twitter_media)
2
+
3
+ # Overview
4
+ - You can collect media file (sorry, now image files only except for gif and mp4)
5
+ - The source account is yourself (the media file is collected from your home timeline)
6
+
7
+ # Required
8
+ - `wget` command
9
+
10
+ # Installation
11
+ ```ruby
12
+ $ gem install collect_twitter_media
13
+ ```
14
+
15
+ # Usage
16
+
17
+ #### 1. require gem
18
+ ```ruby
19
+ require 'collect_twitter_media'
20
+ ```
21
+
22
+ #### 2. set your Twitter API token
23
+ - set the Twitter API token
24
+
25
+ ```ruby
26
+ CollectTwitterMedia.consumer_key('YOUR_CONSUMER_KEY')
27
+ CollectTwitterMedia.consumer_secret('YOUR_CONSUMER_SECRET')
28
+ CollectTwitterMedia.access_token('YOUR_ACCESS_TOKEN')
29
+ CollectTwitterMedia.access_token_secret('YOUR_ACCESS_TOKEN_SECRET')
30
+ ```
31
+
32
+ #### 3. 🎉exec `save` method🎉
33
+ - the first argv is the directory name to collect
34
+
35
+ ```ruby
36
+ CollectTwitterMedia.save('media_collection')
37
+ ```
38
+
39
+ #### 4. 🎆you have collected the media files and the csv file!🎆
40
+ - in the directory you specified, the media files have collected
41
+ - the format of filename has 3 parts
42
+ - `@twitter_screen_name`
43
+ - `tweet_id`
44
+ - `media_id`
45
+ - the example of filename
46
+ - `@dhh_934687159870177697_dPi3GGKVoAERExB.jpg`
47
+ - `@tenderlove_924694994511770368_IPi5IX-UEAAoL4v.jpg`
48
+ - in the directory you specified, the csv file has created
49
+ - the columns of this csv file
50
+ - `tweet_id`
51
+ - `screen_name`
52
+ - `original_filename`
53
+ - `save_filename`
54
+ - `uri`
55
+ - `created_at`
56
+ - the filename of this csv file has 2 parts
57
+ - `image_from_twitter`
58
+ - `%Y%m%d_%H%M%S`
59
+ - the example of filename
60
+ - `image_from_twitter_20171126_171717.csv`
61
+ - `image_from_twitter_20171224_210000.csv`
62
+
63
+ # Screenshot
64
+ - the directory which contains the media files and the csv file
65
+ ![the_result_directory](the_result_directory.jpg "the_result_directory")
66
+
67
+ - the csv file
68
+ ![the_csv_file](the_csv_file.png "the_csv_file")
69
+
70
+ # Options
71
+ - the `save` method can take 4 argvs
72
+ - the first: the directory name to collect [required]
73
+ - the second: the collect count of tweet per loop (default: 200) [optional]
74
+ - the third: the loop count to collect media (default: 1) [optional]
75
+ - Be careful about `API Rate limits`
76
+ - the fourth: the starting tweet id to collect media (String, default: the latest) [optional]
77
+
78
+ - example
79
+
80
+ ```ruby
81
+ CollectTwitterMedia.save('media_collection', 100, 4, '12345678901234')
82
+ ```
83
+
84
+ # Note
85
+ - Please, please be careful about `API Rate limits`
86
+ - [Rate limits — Twitter Developers](https://developer.twitter.com/en/docs/basics/rate-limits)
87
+ - [GET statuses/home\_timeline — Twitter Developers](https://developer.twitter.com/en/docs/tweets/timelines/api-reference/get-statuses-home_timeline)
88
+
89
+ # TODO
90
+ - collect from any users timeline
91
+ - collect by keyword search result
92
+ - collect not only image but also gif and mp4
93
+
94
+ ## Development
95
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
96
+
97
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
98
+
99
+ ## Contributing
100
+ Bug reports and pull requests are welcome on GitHub at https://github.com/corselia/collect-twitter-media.
101
+
102
+ ## License
103
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
104
+
105
+ ## History
106
+
107
+ #### 2017-12-02
108
+ - Version 1.1.3
109
+ -Oops... I changed the repository URI on GitHub... (I forgot)
110
+
111
+ #### 2017-12-02
112
+ - Version 1.1.2
113
+ - Oops! I don't know [the naming convention](https://maku77.github.io/ruby/coding-style.html) of Ruby gems. So I revert my changes at Version `1.1.1`
114
+
115
+ #### 2017-12-02
116
+ - Version 1.1.1
117
+ - modify minor typo and etc in `README.md`
118
+ - change repository name from `collect_twitter_media` to `collect-twitter-media`
119
+
120
+ #### 2017-11-26
121
+ - Version 1.1.0
122
+ - add `created_at` column to the output CSV file
123
+
124
+ #### 2017-11-26
125
+ - Version 1.0.0
126
+ - first release
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "collect_twitter_media"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
data/circle.yml ADDED
@@ -0,0 +1,68 @@
1
+ # Ruby CircleCI 2.0 configuration file
2
+ #
3
+ # Check https://circleci.com/docs/2.0/language-ruby/ for more details
4
+ #
5
+ version: 2
6
+ jobs:
7
+ build:
8
+ docker:
9
+ # specify the version you desire here
10
+ - image: circleci/ruby:2.4.1-node-browsers
11
+
12
+ # Specify service dependencies here if necessary
13
+ # CircleCI maintains a library of pre-built images
14
+ # documented at https://circleci.com/docs/2.0/circleci-images/
15
+ # - image: circleci/postgres:9.4
16
+
17
+ working_directory: ~/repo
18
+ branches:
19
+ only:
20
+ - master
21
+ steps:
22
+ - checkout
23
+
24
+ # Download and cache dependencies
25
+ - restore_cache:
26
+ keys:
27
+ - v1-dependencies-{{ checksum "Gemfile.lock" }}
28
+ # fallback to using the latest cache if no exact match is found
29
+ - v1-dependencies-
30
+
31
+ - run:
32
+ name: update bundler
33
+ command: |
34
+ gem install bundler
35
+
36
+ - run:
37
+ name: install dependencies
38
+ command: |
39
+ bundle install --jobs=4 --retry=3 --path vendor/bundle
40
+
41
+ - save_cache:
42
+ paths:
43
+ - ./vendor/bundle
44
+ key: v1-dependencies-{{ checksum "Gemfile.lock" }}
45
+
46
+ # Database setup
47
+ # - run: bundle exec rake db:create
48
+ # - run: bundle exec rake db:schema:load
49
+
50
+ # run tests!
51
+ - run:
52
+ name: run tests
53
+ command: |
54
+ mkdir /tmp/test-results
55
+ TEST_FILES="$(circleci tests glob "spec/**/*_spec.rb" | circleci tests split --split-by=timings)"
56
+
57
+ bundle exec rspec --format progress \
58
+ # --format RspecJunitFormatter \
59
+ # --out /tmp/test-results/rspec.xml \
60
+ # --format progress \
61
+ # "${TEST_FILES}"
62
+
63
+ # collect reports
64
+ - store_test_results:
65
+ path: /tmp/test-results
66
+ - store_artifacts:
67
+ path: /tmp/test-results
68
+ destination: test-results
@@ -0,0 +1,38 @@
1
+
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "collect_twitter_media/version"
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "collect_twitter_media"
8
+ spec.version = CollectTwitterMedia::VERSION
9
+ spec.authors = ["Osamu Takiya"]
10
+ spec.email = ["takiya@toran.sakura.ne.jp"]
11
+
12
+ spec.summary = %q{Collect media files from Twitter}
13
+ spec.description = %q{you can collect the media files such as .jpg, .png ... from Twitter's timeline}
14
+ spec.homepage = "https://github.com/corselia/collect-twitter-media"
15
+ spec.license = "MIT"
16
+
17
+ # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
18
+ # to allow pushing to a single host or delete this section to allow pushing to any host.
19
+ # if spec.respond_to?(:metadata)
20
+ # spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
21
+ # else
22
+ # raise "RubyGems 2.0 or newer is required to protect against " \
23
+ # "public gem pushes."
24
+ # end
25
+
26
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
27
+ f.match(%r{^(test|spec|features)/})
28
+ end
29
+ spec.bindir = "exe"
30
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
31
+ spec.require_paths = ["lib"]
32
+
33
+ spec.add_development_dependency "bundler", "~> 1.16"
34
+ spec.add_development_dependency "rake", "~> 10.0"
35
+ spec.add_development_dependency "rspec", "~> 3.0"
36
+
37
+ spec.add_dependency 'twitter'
38
+ end
@@ -0,0 +1,200 @@
1
+ require 'collect_twitter_media/version'
2
+ require 'collect_twitter_media/file_operation'
3
+ require 'twitter'
4
+ require 'csv'
5
+
6
+ module CollectTwitterMedia
7
+ include FileOperation
8
+ extend self
9
+
10
+ def save(directory, tweet_count=200, loop_count=1, start_tweet_id='') # HACK: too many argv
11
+ twitter_client
12
+ tweet_collection = collect_tweets_with_loop(loop_count)
13
+ save_directory = make_directory_if_not_exist(directory)
14
+ csv_filename = create_csv_file(save_directory)
15
+
16
+ tweet_collection.each do |tweet|
17
+ tweet = original_tweet(tweet) # retweet is NOT correct data, so need to get from original tweet
18
+ save_image_file(save_directory, tweet)
19
+ append_csv_row(csv_filename, tweet)
20
+ end
21
+ end
22
+
23
+ # HACK: TOO LONG...(can I use block?)
24
+ def consumer_key(value)
25
+ @consumer_key = value
26
+ end
27
+
28
+ def consumer_secret(value)
29
+ @consumer_secret = value
30
+ end
31
+
32
+ def access_token(value)
33
+ @access_token = value
34
+ end
35
+
36
+ def access_token_secret(value)
37
+ @access_token_secret = value
38
+ end
39
+
40
+ def twitter_client
41
+ @client = Twitter::REST::Client.new do |config|
42
+ config.consumer_key = @consumer_key
43
+ config.consumer_secret = @consumer_secret
44
+ config.access_token = @access_token
45
+ config.access_token_secret = @access_token_secret
46
+ end
47
+ end
48
+
49
+ # 'until_tweet_id' is EQUAL OR LESS THAN 'until_tweet_id'
50
+ def collect_tweets(until_tweet_id='', count=200)
51
+ begin
52
+ unless until_tweet_id.is_a?(Integer)
53
+ @client.home_timeline(count: count, include_rts: true, tweet_mode: 'extended')
54
+ else
55
+ @client.home_timeline(count: count, max_id: until_tweet_id, include_rts: true, tweet_mode: 'extended')
56
+ end
57
+ rescue => e
58
+ puts e
59
+ exit(1)
60
+ end
61
+ end
62
+
63
+ def collect_tweets_with_loop(loop_count=1, tweet_count=200, start_tweet_id='')
64
+ tweet_collection = []
65
+ until_tweet_id = start_tweet_id
66
+
67
+ loop_count.times do
68
+ tweets = collect_tweets(until_tweet_id, tweet_count)
69
+ break if tweets.empty?
70
+ tweet_collection << tweets
71
+
72
+ next_start_tweet_id = min_tweet_id(tweet_id_collection(tweets)) - 1
73
+ until_tweet_id = next_start_tweet_id
74
+ end
75
+ tweet_collection.flatten
76
+ end
77
+
78
+ def tweet_id_collection(tweets)
79
+ tweet_id_collection = []
80
+ tweets.each do |tweet|
81
+ tweet_id_collection << tweet.id
82
+ end
83
+ tweet_id_collection
84
+ end
85
+
86
+ def min_tweet_id(tweet_id_collection)
87
+ tweet_id_collection.min
88
+ end
89
+
90
+ def max_tweet_id(tweet_id_collection)
91
+ tweet_id_collection.max
92
+ end
93
+
94
+ def media_uris(tweet)
95
+ media_uris = []
96
+ if tweet.media?
97
+ tweet.media.each do |media|
98
+ if media.instance_of?(Twitter::Media::Photo)
99
+ media_uris << media.media_url_https.to_s
100
+ end
101
+ end
102
+ end
103
+ media_uris
104
+ end
105
+
106
+ def media_original_uri(media_uri)
107
+ media_original_uri = "#{media_uri}:orig"
108
+ end
109
+
110
+ def media_filename(media_uri)
111
+ media_uri.match(/https:\/\/pbs\.twimg\.com\/media\/(.*)\z/)[1] # with extension
112
+ end
113
+
114
+ def original_tweet(tweet)
115
+ if tweet.retweet?
116
+ @client.status(tweet.attrs[:retweeted_status][:id], tweet_mode: "extended") # HACK: this occurs be slow response sometimes
117
+ else
118
+ tweet
119
+ end
120
+ end
121
+
122
+ def save_image_file(save_directory, tweet)
123
+ media_uri_and_filename(tweet).each do |media_data|
124
+ tweet_id = media_data['tweet_id']
125
+ media_original_uri = media_data['media_original_uri']
126
+ media_filename = media_data['media_filename']
127
+ screen_name = media_data['screen_name']
128
+
129
+ command = "wget -q #{media_original_uri} -O #{save_directory}/@#{screen_name}_#{tweet_id}_#{media_filename}"
130
+ `#{command}`
131
+ end
132
+ end
133
+
134
+ # if several attachment image files exist, we save all ones
135
+ def media_uri_and_filename(tweet)
136
+ media_uri_and_filename = []
137
+ media_uris(tweet).each do |media_uri|
138
+ insert = {}
139
+ insert['tweet_id'] = tweet.id
140
+ insert['media_original_uri'] = media_original_uri(media_uri)
141
+ insert['media_filename'] = media_filename(media_uri)
142
+ insert['screen_name'] = tweet.attrs[:user][:screen_name]
143
+ insert['created_at'] = tweet.created_at
144
+
145
+ media_uri_and_filename << insert
146
+ end
147
+ media_uri_and_filename
148
+ end
149
+
150
+ def create_csv_file(save_directory, base_filename='image_from_twitter')
151
+ now_time = Time.now.strftime("%Y%m%d_%H%M%S")
152
+ filename = "#{save_directory}/#{base_filename}_#{now_time}.csv"
153
+ header = [
154
+ 'tweet_id',
155
+ 'screen_name',
156
+ 'original_filename',
157
+ 'save_filename',
158
+ 'uri',
159
+ 'created_at',
160
+ ]
161
+ CSV.open(filename, 'w') do |csv_file|
162
+ csv_file << header
163
+ end
164
+
165
+ filename
166
+ end
167
+
168
+ # HACK: 'media_uri_and_filename' method is duplicated in 'save_image_file' method
169
+ def append_csv_row(csv_filename, tweet)
170
+ media_uri_and_filename(tweet).each do |media_data|
171
+ row = [
172
+ media_data['tweet_id'],
173
+ media_data['screen_name'],
174
+ media_data['media_filename'],
175
+ "@#{media_data['screen_name']}_#{media_data['tweet_id']}_#{media_data['media_filename']}",
176
+ media_data['media_original_uri'],
177
+ media_data['created_at'],
178
+ ]
179
+
180
+ CSV.open(csv_filename, 'a') do |csv_file|
181
+ csv_file << row
182
+ end
183
+ end
184
+ end
185
+
186
+ # deprecated method because when tweet is retweet it doesn't work correctly
187
+ def media_uris_and_filenames(tweets)
188
+ media_uris_and_filenames = []
189
+ tweets.each do |tweet|
190
+ media_uris_and_filenames << media_uri_and_filename(tweet)
191
+ end
192
+ media_uris_and_filenames.flatten
193
+ end
194
+
195
+ # not used
196
+ def via_client(tweet)
197
+ source = tweet.source # ex. "<a href=\"http://twitter.com\" rel=\"nofollow\">Twitter Web Client</a>"
198
+ source.match(/\A<a href=".*>(.*)<\/a>\z/)[1]
199
+ end
200
+ end
@@ -0,0 +1,24 @@
1
+ require 'pathname'
2
+
3
+ module FileOperation
4
+ def make_directory_if_not_exist(directory)
5
+ directory = Pathname.new(to_pathname(directory)).to_s
6
+ unless File.exists?(directory)
7
+ command = "mkdir #{directory}"
8
+ `#{command}`
9
+ end
10
+ directory
11
+ end
12
+
13
+ def basename_of_image_file(image_file)
14
+ File.basename(image_file, '.*')
15
+ end
16
+
17
+ def remove_image(filename)
18
+ File.delete(filename)
19
+ end
20
+
21
+ def to_pathname(filename_or_dirname)
22
+ Pathname.new(Dir.pwd).join(filename_or_dirname).to_s
23
+ end
24
+ end
@@ -0,0 +1,3 @@
1
+ module CollectTwitterMedia
2
+ VERSION = "1.1.3"
3
+ end
data/the_csv_file.png ADDED
Binary file
Binary file
metadata ADDED
@@ -0,0 +1,120 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: collect_twitter_media
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.1.3
5
+ platform: ruby
6
+ authors:
7
+ - Osamu Takiya
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2017-12-02 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.16'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.16'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: twitter
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: you can collect the media files such as .jpg, .png ... from Twitter's
70
+ timeline
71
+ email:
72
+ - takiya@toran.sakura.ne.jp
73
+ executables: []
74
+ extensions: []
75
+ extra_rdoc_files: []
76
+ files:
77
+ - ".coveralls.yml"
78
+ - ".gitignore"
79
+ - ".rake_tasks~"
80
+ - ".rspec"
81
+ - ".travis.yml"
82
+ - Gemfile
83
+ - Gemfile.lock
84
+ - LICENSE
85
+ - README.md
86
+ - Rakefile
87
+ - bin/console
88
+ - bin/setup
89
+ - circle.yml
90
+ - collect_twitter_media.gemspec
91
+ - lib/collect_twitter_media.rb
92
+ - lib/collect_twitter_media/file_operation.rb
93
+ - lib/collect_twitter_media/version.rb
94
+ - the_csv_file.png
95
+ - the_result_directory.jpg
96
+ homepage: https://github.com/corselia/collect-twitter-media
97
+ licenses:
98
+ - MIT
99
+ metadata: {}
100
+ post_install_message:
101
+ rdoc_options: []
102
+ require_paths:
103
+ - lib
104
+ required_ruby_version: !ruby/object:Gem::Requirement
105
+ requirements:
106
+ - - ">="
107
+ - !ruby/object:Gem::Version
108
+ version: '0'
109
+ required_rubygems_version: !ruby/object:Gem::Requirement
110
+ requirements:
111
+ - - ">="
112
+ - !ruby/object:Gem::Version
113
+ version: '0'
114
+ requirements: []
115
+ rubyforge_project:
116
+ rubygems_version: 2.7.2
117
+ signing_key:
118
+ specification_version: 4
119
+ summary: Collect media files from Twitter
120
+ test_files: []