extraloop-redis-storage 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +1 -0
- data/bin/extraloop +110 -0
- metadata +18 -16
data/README.rdoc
CHANGED
data/bin/extraloop
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
$: << File.realpath(File.dirname(File.dirname(__FILE__))) + "/lib/extraloop"
|
3
|
+
|
4
|
+
require 'thor'
|
5
|
+
require 'pry'
|
6
|
+
require 'fileutils'
|
7
|
+
require 'thor/group'
|
8
|
+
require 'redis-storage'
|
9
|
+
|
10
|
+
class DataStoreCommand < Thor
|
11
|
+
|
12
|
+
ExtraLoop::Storage::autoload_models
|
13
|
+
|
14
|
+
@@sessions = ExtraLoop::Storage::ScrapingSession.all
|
15
|
+
@@redis = Ohm.redis
|
16
|
+
|
17
|
+
map "l" => :list,
|
18
|
+
"d" => :delete,
|
19
|
+
"e" => :export
|
20
|
+
|
21
|
+
desc "list [sessions]", "List harvested datasets filtering by session id range (e.g '25..50')"
|
22
|
+
def list(sessions=nil)
|
23
|
+
data = (filter sessions).map { |session| [ session.id, session.title, session.model && session.model.name, session.model && session.records.size ]}
|
24
|
+
$stdout.puts tabularize(%w[id title model records], data)
|
25
|
+
end
|
26
|
+
|
27
|
+
desc "delete [sessions]", "Remove datasets by session id or session id range"
|
28
|
+
def delete(sessions)
|
29
|
+
deleted = 0
|
30
|
+
(filter sessions).each { |session| (session.delete && session.records.each(&:delete) ) && deleted += 1 }
|
31
|
+
$stderr.puts "\n => #{deleted > 0 && deleted or 'No' } record#{'s' if deleted > 1} deleted \n\n"
|
32
|
+
list
|
33
|
+
end
|
34
|
+
|
35
|
+
desc "export [sessions]", "Export datasets by session id or session id range"
|
36
|
+
|
37
|
+
method_option :format, :default => 'json', :type => 'string'
|
38
|
+
method_option :directory, :default => "#{Time.now.to_i}-export", :aliases => "-o"
|
39
|
+
method_option :require, :type => 'array', :aliases => "-r"
|
40
|
+
|
41
|
+
def export(sessions)
|
42
|
+
paths = Array options[:require]
|
43
|
+
require_files paths if paths.any?
|
44
|
+
format = options[:format]
|
45
|
+
dir = options[:directory]
|
46
|
+
|
47
|
+
raise FormatNotImplemented.new unless %w[json].include? format
|
48
|
+
FileUtils.mkdir(dir) unless File.exists? dir
|
49
|
+
|
50
|
+
(filter sessions).each do |session|
|
51
|
+
filename, data = *[ "#{session.id}_#{session.title.gsub(/\s/,"_")}", session.send("to_#{format}")]
|
52
|
+
File.open("#{dir}/#{filename}.#{format}", "w") { |f| f.write data }
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
# override default banner
|
57
|
+
def self.banner(task, namespace = true, subcommand = false)
|
58
|
+
"datastore#{task.formatted_usage(self, true, subcommand).gsub(/data_store_command/,'')}"
|
59
|
+
end
|
60
|
+
|
61
|
+
private
|
62
|
+
def require_files(paths)
|
63
|
+
files = paths.map do |path|
|
64
|
+
path = path.gsub /^(\.?\/)|(\/$)$/m, ''
|
65
|
+
exception = DataStoreCommand::Exceptions::FileNotFound.new "cannot find #{path}"
|
66
|
+
raise exception unless File.exists?(path)
|
67
|
+
(File.directory? path) && Dir["#{path}/*.rb"] or path
|
68
|
+
|
69
|
+
end.flatten
|
70
|
+
|
71
|
+
files.each { |file| require "./#{file}" }
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
def filter(arg=nil)
|
76
|
+
out = if arg
|
77
|
+
start_end = /(\d+)\.\.(-?\d+)/.match(arg) ? [$1, $2] : [arg, arg]
|
78
|
+
Array(Range.new *start_end).map { |sid| @@sessions[sid] if @@sessions[sid] }.reject(&:nil?)
|
79
|
+
else
|
80
|
+
@@sessions
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
|
85
|
+
def tabularize(headers, data)
|
86
|
+
rows = [headers].concat data.map { |row| row.take(headers.size) }
|
87
|
+
|
88
|
+
max_cell_sizes = Array(Range.new 0, headers.size).map do |coll_index|
|
89
|
+
rows.map { |row| row[coll_index].to_s.length }.max
|
90
|
+
end
|
91
|
+
|
92
|
+
rows = rows.map do |row|
|
93
|
+
" " << row.each_with_index.map { |cell, index| cell.to_s.ljust(max_cell_sizes[index]) }.join(" | ")
|
94
|
+
end
|
95
|
+
|
96
|
+
separator = "-" * (rows.map &:size).max
|
97
|
+
rows.insert(1, separator).join "\n"
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
class DataStoreCommand::Exceptions
|
102
|
+
class FormatNotImplemented < StandardError; end
|
103
|
+
class FileNotFound < StandardError; end
|
104
|
+
end
|
105
|
+
|
106
|
+
class ExtraLoopCommand < Thor
|
107
|
+
register(DataStoreCommand, "datastore", "datastore <command>", "CLI interface to the ExtraLoop Redis Datastore")
|
108
|
+
end
|
109
|
+
|
110
|
+
ExtraLoopCommand.start
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: extraloop-redis-storage
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2012-02-26 00:00:00.000000000Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: extraloop
|
16
|
-
requirement: &
|
16
|
+
requirement: &16543420 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 0.0.3
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *16543420
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: ohm
|
27
|
-
requirement: &
|
27
|
+
requirement: &16542960 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 0.1.3
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *16542960
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: ohm-contrib
|
38
|
-
requirement: &
|
38
|
+
requirement: &16542460 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: 0.1.2
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *16542460
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: thor
|
49
|
-
requirement: &
|
49
|
+
requirement: &16541860 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - =
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: 0.14.6
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *16541860
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: rspec
|
60
|
-
requirement: &
|
60
|
+
requirement: &16541180 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ~>
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: 2.7.0
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *16541180
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: rr
|
71
|
-
requirement: &
|
71
|
+
requirement: &16540580 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ~>
|
@@ -76,10 +76,10 @@ dependencies:
|
|
76
76
|
version: 1.0.4
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *16540580
|
80
80
|
- !ruby/object:Gem::Dependency
|
81
81
|
name: pry
|
82
|
-
requirement: &
|
82
|
+
requirement: &16540100 !ruby/object:Gem::Requirement
|
83
83
|
none: false
|
84
84
|
requirements:
|
85
85
|
- - ~>
|
@@ -87,10 +87,11 @@ dependencies:
|
|
87
87
|
version: 0.9.7.4
|
88
88
|
type: :development
|
89
89
|
prerelease: false
|
90
|
-
version_requirements: *
|
90
|
+
version_requirements: *16540100
|
91
91
|
description: Redis+Ohm based storage for data sets extracted using the ExtraLoop toolkit.
|
92
92
|
email: andrea.giulio.fiore@googlemail.com
|
93
|
-
executables:
|
93
|
+
executables:
|
94
|
+
- extraloop
|
94
95
|
extensions: []
|
95
96
|
extra_rdoc_files: []
|
96
97
|
files:
|
@@ -110,6 +111,7 @@ files:
|
|
110
111
|
- spec/record_spec.rb
|
111
112
|
- spec/scraper_base_spec.rb
|
112
113
|
- spec/scraping_session_spec.rb
|
114
|
+
- bin/extraloop
|
113
115
|
homepage: http://github.com/afiore/extraloop-redis-storage
|
114
116
|
licenses: []
|
115
117
|
post_install_message:
|