gogdb 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +17 -0
- data/README.md +12 -0
- data/bin/gogdb +4 -0
- data/lib/gogdb.rb +16 -0
- data/lib/gogdb/cli.rb +27 -0
- data/lib/gogdb/engine.rb +88 -0
- data/lib/gogdb/logger.rb +28 -0
- data/lib/gogdb/utils.rb +30 -0
- data/lib/gogdb/version.rb +3 -0
- metadata +166 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 82cfcbddbebad876b49fd5dd07b98524daefdce5
|
4
|
+
data.tar.gz: 1e1c39defc8346d307931e31dd453bff05011171
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: aad7dc8e62fe1867dfeda1d8505c0dc5156e88defa212916e2ff664bc2dc2461af0ad00a01793e6c5d0a67af04bbc98b0ac907a78ce3407a7d9cd2b71d13f57a
|
7
|
+
data.tar.gz: c3d4d03217dd48daee67bcd48eb52f435c8adcaccfe2c44a3b9910006be50300f24c0b7c83661af8d22a57a48bb064fadd6595a4299ac6d6a0a949664ba6934d
|
data/LICENSE
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
Copyright (c) 2014 Rolandas Barysas
|
2
|
+
|
3
|
+
This software is provided 'as-is', without any express or implied
|
4
|
+
warranty. In no event will the authors be held liable for any damages
|
5
|
+
arising from the use of this software.
|
6
|
+
|
7
|
+
Permission is granted to anyone to use this software for any purpose,
|
8
|
+
including commercial applications, and to alter it and redistribute it
|
9
|
+
freely, subject to the following restrictions:
|
10
|
+
|
11
|
+
1. The origin of this software must not be misrepresented; you must not
|
12
|
+
claim that you wrote the original software. If you use this software
|
13
|
+
in a product, an acknowledgment in the product documentation would be
|
14
|
+
appreciated but is not required.
|
15
|
+
2. Altered source versions must be plainly marked as such, and must not be
|
16
|
+
misrepresented as being the original software.
|
17
|
+
3. This notice may not be removed or altered from any source distribution.
|
data/README.md
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
gogdb [![Build Status](https://travis-ci.org/rbrs/gogdb.svg?branch=develop)](https://travis-ci.org/rbrs/gogdb)
|
2
|
+
=============
|
3
|
+
|
4
|
+
gogdb is a web crawler for ![gog.com](http://gog.com).
|
5
|
+
|
6
|
+
The purpose of this application is to gather data from gog.com and constantly update database with changes. It is being developed as a backend part for ![gogdb.net](https://github.com/rbrs/gogdb.net).
|
7
|
+
|
8
|
+
**This gem is currently in development and is not ready for general use.**
|
9
|
+
|
10
|
+
### License
|
11
|
+
|
12
|
+
This code is free software; you can redistribute it and/or modify it under the terms of the zlib License. A copy of this license can be found in the included LICENSE file.
|
data/bin/gogdb
ADDED
data/lib/gogdb.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'thor'
|
2
|
+
require 'colorize'
|
3
|
+
require 'net/http'
|
4
|
+
require 'net/ping'
|
5
|
+
require 'json'
|
6
|
+
require 'gogdb/version'
|
7
|
+
require 'gogdb/engine'
|
8
|
+
require 'gogdb/utils'
|
9
|
+
require 'gogdb/logger'
|
10
|
+
require 'gogdb/cli'
|
11
|
+
|
12
|
+
module Gogdb
|
13
|
+
GOG_URL = "http://www.gog.com"
|
14
|
+
GOG_GAMES_URL = "http://www.gog.com/games"
|
15
|
+
GOG_MOVIES_URL = "http://www.gog.com/movies"
|
16
|
+
end
|
data/lib/gogdb/cli.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
module Gogdb
|
2
|
+
|
3
|
+
class Cli < Thor
|
4
|
+
package_name "Gogdb"
|
5
|
+
map "-L" => :list
|
6
|
+
|
7
|
+
desc "version", "Shows current version"
|
8
|
+
def version
|
9
|
+
puts Gogdb::VERSION
|
10
|
+
end
|
11
|
+
|
12
|
+
desc "sync [options]", "Syncs all data between databases"
|
13
|
+
def sync
|
14
|
+
end
|
15
|
+
|
16
|
+
desc "fetch [options]", "Fetches items from gog.com and updates database"
|
17
|
+
method_option :limit, :type => :numeric, :desc => "How many items to fetch [0 - no limit]", :default => 0
|
18
|
+
method_option :type, :type => :string, :enum => ["all", "games", "movies"], :desc => "Type items to fetch", :default => "all"
|
19
|
+
method_option :debug, :type => :boolean, :desc => "Show debug messages", :default => false
|
20
|
+
method_option :silent, :type => :boolean, :desc => "Hide all output", :default => false
|
21
|
+
def fetch
|
22
|
+
@e = Gogdb::Engine.new(options)
|
23
|
+
@e.fetch(options)
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
data/lib/gogdb/engine.rb
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
module Gogdb
|
2
|
+
class Engine
|
3
|
+
|
4
|
+
def initialize(options={})
|
5
|
+
@logger = Logger.new(options)
|
6
|
+
@utils = Utils.new(@logger)
|
7
|
+
end
|
8
|
+
|
9
|
+
# Downloads data from source and updates database with changes
|
10
|
+
#
|
11
|
+
# @param [Hash] options Additional options such as :limit and :type
|
12
|
+
def fetch(options)
|
13
|
+
url = GOG_GAMES_URL if options[:type] == "games"
|
14
|
+
url = GOG_MOVIES_URL if options[:type] == "movies"
|
15
|
+
|
16
|
+
pages_number = get_pages_number(url)
|
17
|
+
@logger.debug "Pages received: #{pages_number}"
|
18
|
+
nil if pages_number < 1 # Do nothing if there are no pages
|
19
|
+
|
20
|
+
# Loop through pages
|
21
|
+
count = 0;
|
22
|
+
catch :limitReached do
|
23
|
+
for i in 0..pages_number do
|
24
|
+
# Let's get products number and URL's for current page
|
25
|
+
items_data = get_data(url)['products']
|
26
|
+
items_number = items_data.length
|
27
|
+
|
28
|
+
# Loop through products and get data of every one
|
29
|
+
for i in 0..items_number do
|
30
|
+
item_url = items_data[i]['url']
|
31
|
+
item_data = get_data("#{GOG_URL}#{item_url}")['gameProductData']
|
32
|
+
|
33
|
+
@logger.debug "Item received: #{item_data['title']}"
|
34
|
+
|
35
|
+
count += 1
|
36
|
+
# Break loop if limit is reached
|
37
|
+
throw :limitReached if count >= options[:limit] && options[:limit] != 0
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# Gets pages number.
|
44
|
+
#
|
45
|
+
# @param [string] url Page URL
|
46
|
+
# @return [integer] number of pages
|
47
|
+
def get_pages_number(url)
|
48
|
+
begin
|
49
|
+
get_data(url)["totalPages"]
|
50
|
+
rescue => e
|
51
|
+
@logger.error "Cannot retrieve pages number."
|
52
|
+
@logger.debug e.message
|
53
|
+
0
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
# Gets data from source and parses global javascript object.
|
58
|
+
#
|
59
|
+
# @param [string] url Page URL
|
60
|
+
# @return [hash]
|
61
|
+
def get_data(url)
|
62
|
+
begin
|
63
|
+
page = Net::HTTP.get(URI(url))
|
64
|
+
JSON.parse(page[/(?<=var gogData = )(.*)(?=;)/,1])
|
65
|
+
rescue => e
|
66
|
+
@logger.warning "Cannot retrieve or parse data from gog.com"
|
67
|
+
@logger.error e.message
|
68
|
+
|
69
|
+
# In case crawler cannot access gog.com, let's check if gog.com is
|
70
|
+
# actually online.
|
71
|
+
ph = Net::Ping::HTTP.new(url)
|
72
|
+
unless ph.ping?
|
73
|
+
@logger.warning "Cannot establish connection to gog.com. Retrying..."
|
74
|
+
|
75
|
+
# Retry connection until gog.com comes online. After that - retry
|
76
|
+
# getting data.
|
77
|
+
@utils.retryConnection(url)
|
78
|
+
retry
|
79
|
+
else
|
80
|
+
@logger.warning "Connection to gog.com established successfully. Retrying..."
|
81
|
+
sleep(15)
|
82
|
+
retry
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
end
|
data/lib/gogdb/logger.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
module Gogdb
|
2
|
+
class Logger
|
3
|
+
def initialize(options={})
|
4
|
+
@debug = options[:debug] || false
|
5
|
+
@silent = options[:silent] || false
|
6
|
+
end
|
7
|
+
|
8
|
+
def log(message)
|
9
|
+
puts message unless @silent
|
10
|
+
end
|
11
|
+
|
12
|
+
def error(message)
|
13
|
+
puts "[#{time}] #{message}".colorize(:light_red) unless @silent
|
14
|
+
end
|
15
|
+
|
16
|
+
def warning(message)
|
17
|
+
puts "[#{time}] #{message}".colorize(:light_yellow) unless @silent
|
18
|
+
end
|
19
|
+
|
20
|
+
def debug(message)
|
21
|
+
puts "[#{time}] #{message}".colorize(:light_blue) if @debug
|
22
|
+
end
|
23
|
+
|
24
|
+
def time
|
25
|
+
Time.now.strftime("%Y-%m-%d %H:%M:%S")
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/lib/gogdb/utils.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
module Gogdb
|
2
|
+
class Utils
|
3
|
+
|
4
|
+
def initialize(logger=Logger.new({}))
|
5
|
+
@logger = logger
|
6
|
+
end
|
7
|
+
|
8
|
+
# Retries connection to GOG.com incrementally (every 10n seconds, up to 120)
|
9
|
+
#
|
10
|
+
# @params [String]
|
11
|
+
# @return [Boolean]
|
12
|
+
def retryConnection(url)
|
13
|
+
@count = 1
|
14
|
+
ph = Net::Ping::HTTP.new(url)
|
15
|
+
|
16
|
+
while true do
|
17
|
+
if ph.ping?
|
18
|
+
@logger.warning "Connection to gog.com established successfully. Retrying previous task..."
|
19
|
+
true
|
20
|
+
else
|
21
|
+
@logger.error "Cannot establish connection. Retrying in #{@count * 10} seconds..."
|
22
|
+
end
|
23
|
+
|
24
|
+
sleep(10 * @count)
|
25
|
+
@count += 1 if @count < 12
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
end
|
metadata
ADDED
@@ -0,0 +1,166 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: gogdb
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Rolandas Barysas
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-01-03 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.7'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.7'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: minitest
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '5.5'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '5.5'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: vcr
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '2.9'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '2.9'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: webmock
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '1.20'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '1.20'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: thor
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0.19'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0.19'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: colorize
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0.7'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0.7'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: net-ping
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '1.7'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '1.7'
|
125
|
+
description: The backend part for gogdb.net. Constantly looks for changes on gog.com.
|
126
|
+
email:
|
127
|
+
- r.b@riseup.net
|
128
|
+
executables:
|
129
|
+
- gogdb
|
130
|
+
extensions: []
|
131
|
+
extra_rdoc_files: []
|
132
|
+
files:
|
133
|
+
- LICENSE
|
134
|
+
- README.md
|
135
|
+
- bin/gogdb
|
136
|
+
- lib/gogdb.rb
|
137
|
+
- lib/gogdb/cli.rb
|
138
|
+
- lib/gogdb/engine.rb
|
139
|
+
- lib/gogdb/logger.rb
|
140
|
+
- lib/gogdb/utils.rb
|
141
|
+
- lib/gogdb/version.rb
|
142
|
+
homepage: http://www.gogdb.net
|
143
|
+
licenses:
|
144
|
+
- zlib
|
145
|
+
metadata: {}
|
146
|
+
post_install_message:
|
147
|
+
rdoc_options: []
|
148
|
+
require_paths:
|
149
|
+
- lib
|
150
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
151
|
+
requirements:
|
152
|
+
- - ">="
|
153
|
+
- !ruby/object:Gem::Version
|
154
|
+
version: 2.1.0
|
155
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - ">="
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0'
|
160
|
+
requirements: []
|
161
|
+
rubyforge_project:
|
162
|
+
rubygems_version: 2.4.5
|
163
|
+
signing_key:
|
164
|
+
specification_version: 4
|
165
|
+
summary: Web crawler for gog.com
|
166
|
+
test_files: []
|