archive_today 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ad131a62edbb1b182f34c8beccaff07cb2e6f73414cd7391c8c8487bfe5ba3b4
4
- data.tar.gz: a30538c8a90aeec34e003c51de6efcefc4f2b8985a3c66c8bb47355ce2fcbd28
3
+ metadata.gz: d37b8ae11abd0b7a09160e3dc7fa47b7e395a9e9b0ee56c6a0c0f6d686e06dd4
4
+ data.tar.gz: 88d084d91aeaefb0033e105ee23348f55335acbf92dc8bf4de07f9efcb894a1d
5
5
  SHA512:
6
- metadata.gz: 92b10f61076a7b1e2c44b93bdad340ed71a5be366b3766f00743e33eec0ca4b3515758fda76a3ee6c140006528cbfd9dc97837bb06a8f466d72ac9e671d82a38
7
- data.tar.gz: fb4d2613c9417dcf16cfce535596574f367a8da48ad25dda967667d918b5ad32c49585de8b32eb1072988de8693e321d85995366760b6215724e1448f5ba7a7b
6
+ metadata.gz: 16a9e59b313e65a34d06657ee3a450ce8fe6e03c8f1f1ceb436db7ebce3fe5fd85b6a30f47818b49c2872bbe6cf641456d14d5d6e6e5323e1d908055abde6ca3
7
+ data.tar.gz: a484ae8056b1e643b0e2852ac25d79ad8e97597b8920cf9fb43947ddf9981fb94fbc4e773e7370c26d84bbf7543ffc31e92841e9aaf08adf5e5f5d7d75230a0e
@@ -1,3 +1,12 @@
1
+ # 0.2.0
2
+
3
+ Also retrieve screenshot URL if available
4
+
5
+ ## Breaking Changes
6
+
7
+ - Method renamed from `#submit` to `#capture`
8
+ - Return type is now a hash with keys `:url` and `:screenshot_url`
9
+
1
10
  # 0.1.0
2
11
 
3
12
  Initial commit
data/Gemfile CHANGED
@@ -1,9 +1,12 @@
1
1
  source 'https://rubygems.org'
2
2
 
3
+ group :development, :test do
4
+ gem 'amazing_print'
5
+ gem 'httplog'
6
+ gem 'irbtools'
7
+ gem 'pry'
8
+ gem 'rake'
9
+ end
10
+
3
11
  # Specify your gem's dependencies in archive_today.gemspec
4
12
  gemspec
5
-
6
- gem 'faraday', '~> 1.0'
7
- gem 'faraday_middleware', '~> 1.0'
8
- gem 'nokogiri', '~> 1.10'
9
- gem 'rake', '~> 12.0'
@@ -1,30 +1,102 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- archive_today (0.1.0)
4
+ archive_today (0.2.0)
5
+ faraday (~> 1.0)
6
+ faraday_middleware (~> 1.0)
7
+ nokogiri (~> 1.10)
8
+ rake (~> 12.0)
5
9
 
6
10
  GEM
7
11
  remote: https://rubygems.org/
8
12
  specs:
13
+ alias (0.2.3)
14
+ amazing_print (1.2.1)
15
+ binding.repl (3.0.0)
16
+ boson (1.3.0)
17
+ boson-more (0.3.1)
18
+ boson (>= 1.3.0)
19
+ cd (1.0.1)
20
+ clipboard (1.0.6)
21
+ coderay (1.1.2)
22
+ debugging (1.1.1)
23
+ binding.repl (~> 3.0)
24
+ paint (>= 0.9, < 3.0)
25
+ every_day_irb (2.1.0)
26
+ cd (~> 1.0)
27
+ fancy_irb (1.2.1)
28
+ paint (>= 0.9, < 3.0)
29
+ unicode-display_width (~> 1.1)
9
30
  faraday (1.0.1)
10
31
  multipart-post (>= 1.2, < 3)
11
32
  faraday_middleware (1.0.0)
12
33
  faraday (~> 1.0)
34
+ ffi (1.13.1)
35
+ g (1.7.2)
36
+ hirb (0.7.3)
37
+ httplog (1.4.2)
38
+ rack (>= 1.0)
39
+ rainbow (>= 2.0.0)
40
+ interactive_editor (0.0.11)
41
+ spoon (>= 0.0.1)
42
+ irbtools (1.7.1)
43
+ alias (~> 0.2.3)
44
+ binding.repl (~> 3.0)
45
+ boson (~> 1.3.0)
46
+ boson-more (~> 0.3.0)
47
+ clipboard (~> 1.0.5)
48
+ coderay (~> 1.1.0)
49
+ debugging (~> 1.0)
50
+ every_day_irb (>= 1.7.1)
51
+ fancy_irb (>= 0.7.3)
52
+ g (>= 1.7.2)
53
+ hirb (~> 0.7, >= 0.7.3)
54
+ interactive_editor (>= 0.0.10)
55
+ method_locator (>= 0.0.4)
56
+ method_source (>= 0.8.2)
57
+ methodfinder (~> 2.0)
58
+ ori (~> 0.1.0)
59
+ os (~> 0.9)
60
+ paint (>= 0.8.7)
61
+ ruby_engine (~> 1.0)
62
+ ruby_info (~> 1.0)
63
+ ruby_version (~> 1.0)
64
+ wirb (>= 1.0.3)
65
+ method_locator (0.0.4)
66
+ method_source (1.0.0)
67
+ methodfinder (2.2.1)
13
68
  mini_portile2 (2.4.0)
14
69
  multipart-post (2.1.1)
15
70
  nokogiri (1.10.10)
16
71
  mini_portile2 (~> 2.4.0)
72
+ ori (0.1.0)
73
+ os (0.9.6)
74
+ paint (2.2.0)
75
+ pry (0.13.1)
76
+ coderay (~> 1.1)
77
+ method_source (~> 1.0)
78
+ rack (2.2.2)
79
+ rainbow (3.0.0)
17
80
  rake (12.3.2)
81
+ ruby_engine (1.0.1)
82
+ ruby_info (1.0.1)
83
+ ruby_version (1.0.2)
84
+ spoon (0.0.6)
85
+ ffi
86
+ unicode-display_width (1.7.0)
87
+ wirb (2.2.1)
88
+ paint (>= 0.9, < 3.0)
18
89
 
19
90
  PLATFORMS
20
91
  ruby
21
92
 
22
93
  DEPENDENCIES
94
+ amazing_print
23
95
  archive_today!
24
- faraday (~> 1.0)
25
- faraday_middleware (~> 1.0)
26
- nokogiri (~> 1.10)
27
- rake (~> 12.0)
96
+ httplog
97
+ irbtools
98
+ pry
99
+ rake
28
100
 
29
101
  BUNDLED WITH
30
102
  2.1.4
data/README.md CHANGED
@@ -20,18 +20,51 @@ Or install it yourself as:
20
20
 
21
21
  ## Usage
22
22
 
23
+ ### Class Method
24
+
25
+ `ArchiveToday#capture`
26
+
27
+ Returns a Hash with keys `:url` and `:screenshot_url`. Note that if the page is in the process of being archived, the screenshot is not generated yet, so `nil` is returned for the screenshot URL.
28
+
29
+ #### Args
30
+
31
+ - `url` (required) - the target URL for archival
32
+ - `debug` (optional) - when set to true, this will log HTTP requests and responses
33
+
34
+ ```ruby
35
+ require 'archive_today'
36
+
37
+ ArchiveToday.capture(url: 'https://example.com')
38
+
39
+ # => { url: 'https://archive.is/a1b2c3, screenshot_url: 'https://archive.is/[...].jpg' }
40
+ ```
41
+
42
+ ### Instance Method
43
+
44
+ `ArchiveToday::Archiver`
45
+
46
+ This class exposes the same `#capture` method, but you can also query the instance for the cached URLs once the capture response is received.
47
+
48
+ #### Args
49
+
50
+ - `url` (required) - the target URL for archival
51
+ - `debug` (optional) - when set to true, this will log HTTP requests and responses
52
+
53
+
23
54
  ```ruby
24
55
  require 'archive_today'
25
56
 
26
- ArchiveToday.submit(url: 'https://example.com')
57
+ a = ArchiveToday::Archiver.new(url: 'https://example.com')
58
+ a.capture
59
+
60
+ puts a.screenshot_url
27
61
 
28
- # => 'https://archive.is/a1b2c3
62
+ # => 'https://archive.is/[...].jpg
29
63
  ```
30
64
 
31
65
  ## Roadmap
32
66
 
33
67
  - proxies
34
- - optionally return URL of captured screenshot
35
68
 
36
69
  ## Development
37
70
 
@@ -12,12 +12,15 @@ Gem::Specification.new do |spec|
12
12
  spec.license = "MIT"
13
13
  spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
14
14
 
15
+ spec.add_dependency 'faraday', '~> 1.0'
16
+ spec.add_dependency 'faraday_middleware', '~> 1.0'
17
+ spec.add_dependency 'nokogiri', '~> 1.10'
18
+ spec.add_dependency 'rake', '~> 12.0'
19
+
15
20
  spec.metadata["homepage_uri"] = spec.homepage
16
21
  spec.metadata["source_code_uri"] = spec.homepage
17
- spec.metadata["changelog_uri"] = "#{spec.homepage}/CHANGELOG.md"
22
+ spec.metadata["changelog_uri"] = "#{spec.homepage}/blob/master/CHANGELOG.md"
18
23
 
19
- # Specify which files should be added to the gem when it is released.
20
- # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
21
24
  spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
22
25
  `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
23
26
  end
@@ -1,3 +1,6 @@
1
+ require 'faraday'
2
+ require 'faraday_middleware'
3
+ require 'nokogiri'
1
4
  require 'archive_today/version'
2
5
  require 'archive_today/archiver'
3
6
 
@@ -5,8 +8,8 @@ module ArchiveToday
5
8
  class Error < StandardError; end
6
9
 
7
10
  class << self
8
- def submit(url:, debug: false)
9
- Archiver.new(url: url, debug: debug).submit
11
+ def capture(url:, debug: false)
12
+ Archiver.new(url: url, debug: debug).capture
10
13
  end
11
14
  end
12
15
  end
@@ -1,6 +1,3 @@
1
- require 'faraday'
2
- require 'faraday_middleware'
3
- require 'nokogiri'
4
1
  require_relative 'version'
5
2
 
6
3
  module ArchiveToday
@@ -8,41 +5,69 @@ module ArchiveToday
8
5
  BASE_URL = 'https://archive.today/'.freeze
9
6
  DEFAULT_USER_AGENT = "archive_today #{ArchiveToday::VERSION}".freeze
10
7
 
11
- attr_reader :debug, :url, :user_agent
8
+ attr_accessor :response
9
+ attr_reader :debug, :target_url, :user_agent
12
10
 
13
11
  def initialize(url:, user_agent: DEFAULT_USER_AGENT, debug: false)
14
12
  @debug = debug
15
- @url = url
13
+ @target_url = url
16
14
  @user_agent = user_agent
17
15
  end
18
16
 
19
- def submit
20
- puts 'Submitting URL ...'
17
+ def capture
18
+ puts 'Submitting URL ...' if debug
21
19
  response = connection.post('/submit/') do |req|
22
20
  req.body = submission_body
23
21
  end
24
22
  raise unless response.success?
25
23
 
26
- handle_response(response)
24
+ self.response = response
25
+
26
+ {
27
+ url: finalized_url,
28
+ screenshot_url: screenshot_url
29
+ }
27
30
  end
28
31
 
29
32
  private
30
33
 
31
- def handle_response(response)
32
- headers = response.headers
34
+ def finalized_url
35
+ archived_url.gsub('/wip', '')
36
+ end
37
+
38
+ def archived_url
39
+ @archived_url ||= begin
40
+ headers = response.headers
33
41
 
34
- return headers[:location] if headers.has_key?('location')
35
- return headers[:refresh].split(';url=').last if headers.has_key?('refresh')
42
+ return headers[:location] if headers.has_key?('location')
43
+ return headers[:refresh].split(';url=').last if headers.has_key?('refresh')
44
+
45
+ # TODO: handle the history case mentioned here?
46
+ # https://github.com/pastpages/archiveis/blob/master/archiveis/api.py#L81
47
+ response.env.url
48
+ end
49
+ end
36
50
 
37
- # TODO: handle the history case mentioned here?
38
- # https://github.com/pastpages/archiveis/blob/master/archiveis/api.py#L81
39
- response.env.url
51
+ def screenshot_url
52
+ return nil unless archived_url
53
+ return nil if archived_url.include? '/wip/'
54
+
55
+ response = connection.get do |req|
56
+ req.url "#{archived_url}/image"
57
+ end
58
+ html = Nokogiri::HTML(response.body)
59
+ node = html.at_css('img[itemprop="contentUrl"]')
60
+ url = node.attr('src')
61
+ puts "Got screenshot URL: #{url}" if debug && url
62
+ return url if url
63
+
64
+ nil
40
65
  end
41
66
 
42
67
  def submission_body
43
68
  URI.encode_www_form(
44
69
  {
45
- url: url,
70
+ url: target_url,
46
71
  anyway: 1,
47
72
  submitid: unique_submission_id
48
73
  }
@@ -50,14 +75,15 @@ module ArchiveToday
50
75
  end
51
76
 
52
77
  def unique_submission_id
53
- puts 'Getting unique submission ID ...'
78
+ puts 'Getting unique submission ID ...' if debug
54
79
  response = connection.get('/')
55
80
  raise unless response.success?
56
81
 
57
82
  html = Nokogiri::HTML(response.body)
58
83
  node = html.at_css('input[name="submitid"]')
59
84
  id = node.attr('value')
60
- puts "Got ID: #{id}" and return if id
85
+ puts "Got ID: #{id}" if debug && id
86
+ return id if id
61
87
 
62
88
  nil
63
89
  end
@@ -1,3 +1,3 @@
1
1
  module ArchiveToday
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: archive_today
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - tomholford
@@ -9,7 +9,63 @@ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
11
  date: 2020-07-13 00:00:00.000000000 Z
12
- dependencies: []
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: faraday
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: faraday_middleware
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: nokogiri
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.10'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.10'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '12.0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '12.0'
13
69
  description: Submit a URL to the Archive.today service to preserve it's contents in
14
70
  a Memento-compatible format
15
71
  email:
@@ -37,7 +93,7 @@ licenses:
37
93
  metadata:
38
94
  homepage_uri: https://github.com/tomholford/archive-today
39
95
  source_code_uri: https://github.com/tomholford/archive-today
40
- changelog_uri: https://github.com/tomholford/archive-today/CHANGELOG.md
96
+ changelog_uri: https://github.com/tomholford/archive-today/blob/master/CHANGELOG.md
41
97
  post_install_message:
42
98
  rdoc_options: []
43
99
  require_paths: