turl 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: edc78b86ab00971907c4135b49e0aa815fbf50f8ca0c51331811a528c2ba0733
4
+ data.tar.gz: 84c27498e752a7bd98ec9e944d0b446bfad8c249b66ce29d01405f6f9403db3e
5
+ SHA512:
6
+ metadata.gz: d43560abcea1393bf44d5f18cedd04ec7c433eb83553f256f5d86dc73193d25153a226c7b8221e0339387223f8cc1880f169a5cc2652a691e8c1e80e5f81db70
7
+ data.tar.gz: 111723347b196e5bbd810ae14948524149e34011611d2abd99bb0c90a6d8d6d828766a0f10b59d42e2a31be02b7521b826e286a90d90155aa19065d60df37aae
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+ /Gemfile.lock
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in turl.gemspec
4
+ gemspec
5
+
6
+ gem "rake", "~> 12.0"
@@ -0,0 +1,36 @@
1
+ # Turl
2
+
3
+ Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/turl`. To experiment with that code, run `bin/console` for an interactive prompt.
4
+
5
+ TODO: Delete this and the text above, and describe your gem
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'turl'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle install
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install turl
22
+
23
+ ## Usage
24
+
25
+ TODO: Write usage instructions here
26
+
27
+ ## Development
28
+
29
+ After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
30
+
31
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
32
+
33
+ ## Contributing
34
+
35
+ Bug reports and pull requests are welcome on GitHub at https://github.com/pocke/turl.
36
+
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+ task :default => :spec
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "turl"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ Turl.prepare_database!
14
+
15
+ require "irb"
16
+ IRB.start(__FILE__)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,5 @@
1
+ #!ruby
2
+
3
+ require 'turl'
4
+
5
+ Turl::Collect.run(ARGV)
@@ -0,0 +1,7 @@
1
+ #!ruby
2
+
3
+ require 'turl'
4
+ require 'turl/web'
5
+ require 'erubi'
6
+
7
+ Turl::Web.run(ARGV)
@@ -0,0 +1,96 @@
1
+ require 'twitter'
2
+ require 'logger'
3
+ require 'pathname'
4
+ require 'active_record'
5
+ require 'nokogiri'
6
+ require 'open-uri'
7
+ require 'net/http'
8
+
9
+ require "turl/version"
10
+ require 'turl/collect'
11
+ require 'turl/application_record'
12
+ require 'turl/link'
13
+ require 'turl/tweet'
14
+ require 'turl/twitter_user'
15
+ require 'turl/tweet_link'
16
+ require 'turl/url_normalization'
17
+ require 'turl/normalizer'
18
+
19
+ module Turl
20
+ CACHE_PATH = Pathname('~/.cache/turl').expand_path
21
+ DATABASE_PATH = CACHE_PATH / 'db.sqlite3'
22
+
23
+ def self.prepare_database!
24
+ CACHE_PATH.mkpath
25
+ ActiveRecord::Base.establish_connection(adapter: 'sqlite3', database: DATABASE_PATH.to_s)
26
+ unless DATABASE_PATH.exist?
27
+ <<~SQL.split(';').select(&:present?).each { |sql| ActiveRecord::Base.connection.execute(sql) }
28
+ create table links (
29
+ id integer primary key,
30
+
31
+ normalized_url text not null,
32
+ title text,
33
+
34
+ created_at datetime not null,
35
+ updated_at datetime not null
36
+ );
37
+
38
+ create unique index uniq_links_url_tweet on links(normalized_url);
39
+
40
+ create table tweets (
41
+ id integer primary key,
42
+ twitter_id text not null,
43
+
44
+ twitter_user_id integer not null,
45
+ content text not null,
46
+ tweeted_at datetime not null,
47
+
48
+ created_at datetime not null,
49
+ updated_at datetime not null
50
+ );
51
+
52
+ create unique index uniq_tweets_twitter_id on tweets(twitter_id);
53
+
54
+ create table tweet_links (
55
+ id integer primary key,
56
+
57
+ tweet_id integer not null,
58
+ link_id integer not null,
59
+
60
+ created_at datetime not null,
61
+ updated_at datetime not null
62
+ );
63
+
64
+ create unique index uniq_tweet_links_tweet_link on tweet_links(tweet_id, link_id);
65
+
66
+ create table twitter_users (
67
+ id integer primary key,
68
+ twitter_id text not null,
69
+
70
+ screen_name text not null,
71
+
72
+ created_at datetime not null,
73
+ updated_at datetime not null
74
+ );
75
+
76
+ create unique index uniq_twitter_users_twitter_id on twitter_users(twitter_id);
77
+
78
+ create table url_normalizations (
79
+ id integer primary key,
80
+
81
+ original_url text not null,
82
+ normalized_url text not null,
83
+
84
+ created_at datetime not null,
85
+ updated_at datetime not null
86
+ );
87
+
88
+ create unique index uniq_url_normalizations_original_url on url_normalizations(original_url);
89
+ SQL
90
+ end
91
+ end
92
+
93
+ def self.logger
94
+ @logger ||= Logger.new(STDOUT)
95
+ end
96
+ end
@@ -0,0 +1,5 @@
1
+ module Turl
2
+ class ApplicationRecord < ActiveRecord::Base
3
+ self.abstract_class = true
4
+ end
5
+ end
@@ -0,0 +1,46 @@
1
+ module Turl
2
+ class Collect
3
+ def self.run(argv)
4
+ Turl.prepare_database!
5
+ self.new.run(argv)
6
+ end
7
+
8
+ def run(argv)
9
+ since_id = nil
10
+ loop do
11
+ new_since_id = fetch_and_save(since_id: since_id)
12
+ since_id = new_since_id if new_since_id
13
+ sleep 60
14
+ end
15
+ end
16
+
17
+ def fetch_and_save(since_id:)
18
+ Turl.logger.info "start Turl::Collect#fetch_and_save(since_id: #{since_id.inspect})"
19
+
20
+ tweets = client.home_timeline(count: 200)
21
+ tweets.each do |tweet_resp|
22
+ next if tweet_resp.urls.empty?
23
+
24
+ tweet = Tweet.from_response!(tweet_resp)
25
+
26
+ tweet_resp.urls.each do |url|
27
+ next if Link.ignored?(url)
28
+
29
+ Link.from_response!(url, tweet)
30
+ end
31
+ end
32
+ Turl.logger.info "done Turl::Collect#fetch_and_save(since_id: #{since_id.inspect})"
33
+ tweets.first&.id
34
+ end
35
+
36
+ private def client
37
+ @client ||= Twitter::REST::Client.new do |config|
38
+ config.consumer_key = ENV.fetch('TURL_CONSUMER_KEY')
39
+ config.consumer_secret = ENV.fetch('TURL_CONSUMER_SECRET')
40
+ config.access_token = ENV.fetch('TURL_ACCESS_TOKEN')
41
+ config.access_token_secret = ENV.fetch('TURL_ACCESS_TOKEN_SECRET')
42
+ end
43
+ end
44
+
45
+ end
46
+ end
@@ -0,0 +1,38 @@
1
+ module Turl
2
+ class Link < ApplicationRecord
3
+ has_many :tweet_links
4
+ has_many :tweets, through: :tweet_links
5
+
6
+ # TODO: Make it configurable
7
+ def self.ignored?(url)
8
+ url.expanded_url.host == 'twitter.com'
9
+ end
10
+
11
+ def self.from_response!(resp, tweet)
12
+ url = resp.expanded_url.to_s
13
+ find_or_initialize_by(normalized_url: normalize(url)).tap do |u|
14
+ if u.new_record?
15
+ begin
16
+ title = URI.open(u.normalized_url) do |resp|
17
+ Nokogiri::HTML(resp.read).title
18
+ end
19
+ rescue => ex
20
+ Turl.logger.error "Error when detecting title for #{u.normalized_url}: #{ex}"
21
+ end
22
+
23
+ u.update!(
24
+ title: title,
25
+ )
26
+ else
27
+ u.save!
28
+ end
29
+
30
+ u.tweets << tweet unless u.tweets.include?(tweet)
31
+ end
32
+ end
33
+
34
+ def self.normalize(url)
35
+ Normalizer.normalize(url)
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,45 @@
1
+ module Turl
2
+ module Normalizer
3
+ extend self
4
+
5
+ def normalize(url)
6
+ path = []
7
+ normalize_internal(url, path: path).tap do |result|
8
+ path.each do |before|
9
+ record = UrlNormalization.find_or_initialize_by(original_url: before)
10
+ record.update!(normalized_url: result)
11
+ end
12
+ end
13
+ end
14
+
15
+ private def normalize_internal(url, path:)
16
+ return url if path.include?(url) || path.size > 30
17
+
18
+ path << url
19
+
20
+ ret = UrlNormalization.find_by(original_url: url)
21
+ return ret.normalized_url if ret
22
+
23
+ parsed = URI.parse(url)
24
+ case
25
+ when parsed.host == 'htn.to'
26
+ resp = head(parsed)
27
+ normalize_internal(resp['x-redirect-to'] || url, path: path)
28
+ else
29
+ resp = head(parsed)
30
+ if resp.is_a?(Net::HTTPRedirection) && resp['location']
31
+ normalize_internal(resp['location'], path: path)
32
+ else
33
+ url
34
+ end
35
+ end
36
+ end
37
+
38
+ private def head(uri)
39
+ http = Net::HTTP.new(uri.host, uri.port)
40
+ http.use_ssl = true if uri.scheme == 'https'
41
+ path = [uri.path, uri.query].compact.join('?').presence || '/'
42
+ http.request_head(path)
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,17 @@
1
+ module Turl
2
+ class Tweet < ApplicationRecord
3
+ belongs_to :twitter_user
4
+ has_many :links, through: :tweet_links
5
+
6
+ def self.from_response!(tweet_resp)
7
+ user = TwitterUser.from_response!(tweet_resp.user)
8
+ find_or_initialize_by(twitter_id: tweet_resp.id).tap do |t|
9
+ t.update!(
10
+ content: tweet_resp.text,
11
+ twitter_user: user,
12
+ tweeted_at: tweet_resp.created_at,
13
+ )
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,6 @@
1
+ module Turl
2
+ class TweetLink < ApplicationRecord
3
+ belongs_to :tweet
4
+ belongs_to :link
5
+ end
6
+ end
@@ -0,0 +1,11 @@
1
+ module Turl
2
+ class TwitterUser < ApplicationRecord
3
+ has_many :tweets
4
+
5
+ def self.from_response!(resp)
6
+ find_or_initialize_by(twitter_id: resp.id).tap do |u|
7
+ u.update!(screen_name: resp.screen_name)
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,4 @@
1
+ module Turl
2
+ class UrlNormalization < ApplicationRecord
3
+ end
4
+ end
@@ -0,0 +1,3 @@
1
+ module Turl
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,10 @@
1
+ <ul>
2
+ <% links.each do |link| %>
3
+ <li>
4
+ <a href="<%= link.normalized_url %>">
5
+ <%= link.title %>
6
+ <br />
7
+ <small><%= link.normalized_url %></small>
8
+ </li>
9
+ <% end %>
10
+ </ul>
@@ -0,0 +1,23 @@
1
+ require 'sinatra/base'
2
+
3
+ module Turl
4
+ class Web
5
+ def self.run(argv)
6
+ Turl.prepare_database!
7
+ self.new.run(argv)
8
+ end
9
+
10
+ def run(argv)
11
+ App.run!
12
+ end
13
+
14
+ class App < Sinatra::Base
15
+ set :erb, :escape_html => true
16
+
17
+ get '/' do
18
+ links = Link.where('updated_at > ?', 1.week.ago).order(updated_at: :desc)
19
+ erb :'root.html', locals: { links: links }
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,33 @@
1
+ require_relative 'lib/turl/version'
2
+
3
+ Gem::Specification.new do |spec|
4
+ spec.name = "turl"
5
+ spec.version = Turl::VERSION
6
+ spec.authors = ["Masataka Pocke Kuwabara"]
7
+ spec.email = ["kuwabara@pocke.me"]
8
+
9
+ spec.summary = %q{}
10
+ spec.description = %q{}
11
+ spec.homepage = "https://github.com/pocke/turl"
12
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
13
+
14
+ spec.metadata["homepage_uri"] = spec.homepage
15
+ spec.metadata["source_code_uri"] = spec.homepage
16
+ # spec.metadata["changelog_uri"] = "TODO: Put your gem's CHANGELOG.md URL here."
17
+
18
+ # Specify which files should be added to the gem when it is released.
19
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
20
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
21
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
22
+ end
23
+ spec.bindir = "exe"
24
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
25
+ spec.require_paths = ["lib"]
26
+
27
+ spec.add_runtime_dependency 'twitter'
28
+ spec.add_runtime_dependency 'activerecord'
29
+ spec.add_runtime_dependency 'sqlite3'
30
+ spec.add_runtime_dependency 'nokogiri'
31
+ spec.add_runtime_dependency 'sinatra'
32
+ spec.add_runtime_dependency 'erubi'
33
+ end
metadata ADDED
@@ -0,0 +1,151 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: turl
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Masataka Pocke Kuwabara
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2020-04-29 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: twitter
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: activerecord
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: sqlite3
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: nokogiri
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: sinatra
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: erubi
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: ''
98
+ email:
99
+ - kuwabara@pocke.me
100
+ executables:
101
+ - turl-collect
102
+ - turl-web
103
+ extensions: []
104
+ extra_rdoc_files: []
105
+ files:
106
+ - ".gitignore"
107
+ - Gemfile
108
+ - README.md
109
+ - Rakefile
110
+ - bin/console
111
+ - bin/setup
112
+ - exe/turl-collect
113
+ - exe/turl-web
114
+ - lib/turl.rb
115
+ - lib/turl/application_record.rb
116
+ - lib/turl/collect.rb
117
+ - lib/turl/link.rb
118
+ - lib/turl/normalizer.rb
119
+ - lib/turl/tweet.rb
120
+ - lib/turl/tweet_link.rb
121
+ - lib/turl/twitter_user.rb
122
+ - lib/turl/url_normalization.rb
123
+ - lib/turl/version.rb
124
+ - lib/turl/views/root.html.erb
125
+ - lib/turl/web.rb
126
+ - turl.gemspec
127
+ homepage: https://github.com/pocke/turl
128
+ licenses: []
129
+ metadata:
130
+ homepage_uri: https://github.com/pocke/turl
131
+ source_code_uri: https://github.com/pocke/turl
132
+ post_install_message:
133
+ rdoc_options: []
134
+ require_paths:
135
+ - lib
136
+ required_ruby_version: !ruby/object:Gem::Requirement
137
+ requirements:
138
+ - - ">="
139
+ - !ruby/object:Gem::Version
140
+ version: 2.3.0
141
+ required_rubygems_version: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ requirements: []
147
+ rubygems_version: 3.1.2
148
+ signing_key:
149
+ specification_version: 4
150
+ summary: ''
151
+ test_files: []