gogdb 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +17 -0
- data/README.md +12 -0
- data/bin/gogdb +4 -0
- data/lib/gogdb.rb +16 -0
- data/lib/gogdb/cli.rb +27 -0
- data/lib/gogdb/engine.rb +88 -0
- data/lib/gogdb/logger.rb +28 -0
- data/lib/gogdb/utils.rb +30 -0
- data/lib/gogdb/version.rb +3 -0
- metadata +166 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 82cfcbddbebad876b49fd5dd07b98524daefdce5
|
4
|
+
data.tar.gz: 1e1c39defc8346d307931e31dd453bff05011171
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: aad7dc8e62fe1867dfeda1d8505c0dc5156e88defa212916e2ff664bc2dc2461af0ad00a01793e6c5d0a67af04bbc98b0ac907a78ce3407a7d9cd2b71d13f57a
|
7
|
+
data.tar.gz: c3d4d03217dd48daee67bcd48eb52f435c8adcaccfe2c44a3b9910006be50300f24c0b7c83661af8d22a57a48bb064fadd6595a4299ac6d6a0a949664ba6934d
|
data/LICENSE
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
Copyright (c) 2014 Rolandas Barysas
|
2
|
+
|
3
|
+
This software is provided 'as-is', without any express or implied
|
4
|
+
warranty. In no event will the authors be held liable for any damages
|
5
|
+
arising from the use of this software.
|
6
|
+
|
7
|
+
Permission is granted to anyone to use this software for any purpose,
|
8
|
+
including commercial applications, and to alter it and redistribute it
|
9
|
+
freely, subject to the following restrictions:
|
10
|
+
|
11
|
+
1. The origin of this software must not be misrepresented; you must not
|
12
|
+
claim that you wrote the original software. If you use this software
|
13
|
+
in a product, an acknowledgment in the product documentation would be
|
14
|
+
appreciated but is not required.
|
15
|
+
2. Altered source versions must be plainly marked as such, and must not be
|
16
|
+
misrepresented as being the original software.
|
17
|
+
3. This notice may not be removed or altered from any source distribution.
|
data/README.md
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
gogdb [](https://travis-ci.org/rbrs/gogdb)
|
2
|
+
=============
|
3
|
+
|
4
|
+
gogdb is a web crawler for .
|
5
|
+
|
6
|
+
The purpose of this application is to gather data from gog.com and constantly update database with changes. It is being developed as a backend part for .
|
7
|
+
|
8
|
+
**This gem is currently in development and is not ready for general use.**
|
9
|
+
|
10
|
+
### License
|
11
|
+
|
12
|
+
This code is free software; you can redistribute it and/or modify it under the terms of the zlib License. A copy of this license can be found in the included LICENSE file.
|
data/bin/gogdb
ADDED
data/lib/gogdb.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'thor'
|
2
|
+
require 'colorize'
|
3
|
+
require 'net/http'
|
4
|
+
require 'net/ping'
|
5
|
+
require 'json'
|
6
|
+
require 'gogdb/version'
|
7
|
+
require 'gogdb/engine'
|
8
|
+
require 'gogdb/utils'
|
9
|
+
require 'gogdb/logger'
|
10
|
+
require 'gogdb/cli'
|
11
|
+
|
12
|
+
module Gogdb
|
13
|
+
GOG_URL = "http://www.gog.com"
|
14
|
+
GOG_GAMES_URL = "http://www.gog.com/games"
|
15
|
+
GOG_MOVIES_URL = "http://www.gog.com/movies"
|
16
|
+
end
|
data/lib/gogdb/cli.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
module Gogdb
|
2
|
+
|
3
|
+
class Cli < Thor
|
4
|
+
package_name "Gogdb"
|
5
|
+
map "-L" => :list
|
6
|
+
|
7
|
+
desc "version", "Shows current version"
|
8
|
+
def version
|
9
|
+
puts Gogdb::VERSION
|
10
|
+
end
|
11
|
+
|
12
|
+
desc "sync [options]", "Syncs all data between databases"
|
13
|
+
def sync
|
14
|
+
end
|
15
|
+
|
16
|
+
desc "fetch [options]", "Fetches items from gog.com and updates database"
|
17
|
+
method_option :limit, :type => :numeric, :desc => "How many items to fetch [0 - no limit]", :default => 0
|
18
|
+
method_option :type, :type => :string, :enum => ["all", "games", "movies"], :desc => "Type items to fetch", :default => "all"
|
19
|
+
method_option :debug, :type => :boolean, :desc => "Show debug messages", :default => false
|
20
|
+
method_option :silent, :type => :boolean, :desc => "Hide all output", :default => false
|
21
|
+
def fetch
|
22
|
+
@e = Gogdb::Engine.new(options)
|
23
|
+
@e.fetch(options)
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
data/lib/gogdb/engine.rb
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
module Gogdb
|
2
|
+
class Engine
|
3
|
+
|
4
|
+
def initialize(options={})
|
5
|
+
@logger = Logger.new(options)
|
6
|
+
@utils = Utils.new(@logger)
|
7
|
+
end
|
8
|
+
|
9
|
+
# Downloads data from source and updates database with changes
|
10
|
+
#
|
11
|
+
# @param [Hash] options Additional options such as :limit and :type
|
12
|
+
def fetch(options)
|
13
|
+
url = GOG_GAMES_URL if options[:type] == "games"
|
14
|
+
url = GOG_MOVIES_URL if options[:type] == "movies"
|
15
|
+
|
16
|
+
pages_number = get_pages_number(url)
|
17
|
+
@logger.debug "Pages received: #{pages_number}"
|
18
|
+
nil if pages_number < 1 # Do nothing if there are no pages
|
19
|
+
|
20
|
+
# Loop through pages
|
21
|
+
count = 0;
|
22
|
+
catch :limitReached do
|
23
|
+
for i in 0..pages_number do
|
24
|
+
# Let's get products number and URL's for current page
|
25
|
+
items_data = get_data(url)['products']
|
26
|
+
items_number = items_data.length
|
27
|
+
|
28
|
+
# Loop through products and get data of every one
|
29
|
+
for i in 0..items_number do
|
30
|
+
item_url = items_data[i]['url']
|
31
|
+
item_data = get_data("#{GOG_URL}#{item_url}")['gameProductData']
|
32
|
+
|
33
|
+
@logger.debug "Item received: #{item_data['title']}"
|
34
|
+
|
35
|
+
count += 1
|
36
|
+
# Break loop if limit is reached
|
37
|
+
throw :limitReached if count >= options[:limit] && options[:limit] != 0
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# Gets pages number.
|
44
|
+
#
|
45
|
+
# @param [string] url Page URL
|
46
|
+
# @return [integer] number of pages
|
47
|
+
def get_pages_number(url)
|
48
|
+
begin
|
49
|
+
get_data(url)["totalPages"]
|
50
|
+
rescue => e
|
51
|
+
@logger.error "Cannot retrieve pages number."
|
52
|
+
@logger.debug e.message
|
53
|
+
0
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
# Gets data from source and parses global javascript object.
|
58
|
+
#
|
59
|
+
# @param [string] url Page URL
|
60
|
+
# @return [hash]
|
61
|
+
def get_data(url)
|
62
|
+
begin
|
63
|
+
page = Net::HTTP.get(URI(url))
|
64
|
+
JSON.parse(page[/(?<=var gogData = )(.*)(?=;)/,1])
|
65
|
+
rescue => e
|
66
|
+
@logger.warning "Cannot retrieve or parse data from gog.com"
|
67
|
+
@logger.error e.message
|
68
|
+
|
69
|
+
# In case crawler cannot access gog.com, let's check if gog.com is
|
70
|
+
# actually online.
|
71
|
+
ph = Net::Ping::HTTP.new(url)
|
72
|
+
unless ph.ping?
|
73
|
+
@logger.warning "Cannot establish connection to gog.com. Retrying..."
|
74
|
+
|
75
|
+
# Retry connection until gog.com comes online. After that - retry
|
76
|
+
# getting data.
|
77
|
+
@utils.retryConnection(url)
|
78
|
+
retry
|
79
|
+
else
|
80
|
+
@logger.warning "Connection to gog.com established successfully. Retrying..."
|
81
|
+
sleep(15)
|
82
|
+
retry
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
end
|
data/lib/gogdb/logger.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
module Gogdb
|
2
|
+
class Logger
|
3
|
+
def initialize(options={})
|
4
|
+
@debug = options[:debug] || false
|
5
|
+
@silent = options[:silent] || false
|
6
|
+
end
|
7
|
+
|
8
|
+
def log(message)
|
9
|
+
puts message unless @silent
|
10
|
+
end
|
11
|
+
|
12
|
+
def error(message)
|
13
|
+
puts "[#{time}] #{message}".colorize(:light_red) unless @silent
|
14
|
+
end
|
15
|
+
|
16
|
+
def warning(message)
|
17
|
+
puts "[#{time}] #{message}".colorize(:light_yellow) unless @silent
|
18
|
+
end
|
19
|
+
|
20
|
+
def debug(message)
|
21
|
+
puts "[#{time}] #{message}".colorize(:light_blue) if @debug
|
22
|
+
end
|
23
|
+
|
24
|
+
def time
|
25
|
+
Time.now.strftime("%Y-%m-%d %H:%M:%S")
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/lib/gogdb/utils.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
module Gogdb
|
2
|
+
class Utils
|
3
|
+
|
4
|
+
def initialize(logger=Logger.new({}))
|
5
|
+
@logger = logger
|
6
|
+
end
|
7
|
+
|
8
|
+
# Retries connection to GOG.com incrementally (every 10n seconds, up to 120)
|
9
|
+
#
|
10
|
+
# @params [String]
|
11
|
+
# @return [Boolean]
|
12
|
+
def retryConnection(url)
|
13
|
+
@count = 1
|
14
|
+
ph = Net::Ping::HTTP.new(url)
|
15
|
+
|
16
|
+
while true do
|
17
|
+
if ph.ping?
|
18
|
+
@logger.warning "Connection to gog.com established successfully. Retrying previous task..."
|
19
|
+
true
|
20
|
+
else
|
21
|
+
@logger.error "Cannot establish connection. Retrying in #{@count * 10} seconds..."
|
22
|
+
end
|
23
|
+
|
24
|
+
sleep(10 * @count)
|
25
|
+
@count += 1 if @count < 12
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
end
|
metadata
ADDED
@@ -0,0 +1,166 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: gogdb
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Rolandas Barysas
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-01-03 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.7'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.7'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: minitest
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '5.5'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '5.5'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: vcr
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '2.9'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '2.9'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: webmock
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '1.20'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '1.20'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: thor
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0.19'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0.19'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: colorize
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0.7'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0.7'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: net-ping
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '1.7'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '1.7'
|
125
|
+
description: The backend part for gogdb.net. Constantly looks for changes on gog.com.
|
126
|
+
email:
|
127
|
+
- r.b@riseup.net
|
128
|
+
executables:
|
129
|
+
- gogdb
|
130
|
+
extensions: []
|
131
|
+
extra_rdoc_files: []
|
132
|
+
files:
|
133
|
+
- LICENSE
|
134
|
+
- README.md
|
135
|
+
- bin/gogdb
|
136
|
+
- lib/gogdb.rb
|
137
|
+
- lib/gogdb/cli.rb
|
138
|
+
- lib/gogdb/engine.rb
|
139
|
+
- lib/gogdb/logger.rb
|
140
|
+
- lib/gogdb/utils.rb
|
141
|
+
- lib/gogdb/version.rb
|
142
|
+
homepage: http://www.gogdb.net
|
143
|
+
licenses:
|
144
|
+
- zlib
|
145
|
+
metadata: {}
|
146
|
+
post_install_message:
|
147
|
+
rdoc_options: []
|
148
|
+
require_paths:
|
149
|
+
- lib
|
150
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
151
|
+
requirements:
|
152
|
+
- - ">="
|
153
|
+
- !ruby/object:Gem::Version
|
154
|
+
version: 2.1.0
|
155
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - ">="
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0'
|
160
|
+
requirements: []
|
161
|
+
rubyforge_project:
|
162
|
+
rubygems_version: 2.4.5
|
163
|
+
signing_key:
|
164
|
+
specification_version: 4
|
165
|
+
summary: Web crawler for gog.com
|
166
|
+
test_files: []
|