extraloop-redis-storage 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/README.rdoc +1 -0
  2. data/bin/extraloop +110 -0
  3. metadata +18 -16
data/README.rdoc CHANGED
@@ -51,3 +51,4 @@ The +set_storage+ method can be called with the following arguments:
51
51
  * _model_ A Ruby constant specifying the model to be used for storing the extracted data .
52
52
  * _session_title_ A human readable title for the extracted dataset (optional).
53
53
 
54
+
data/bin/extraloop ADDED
@@ -0,0 +1,110 @@
1
+ #! /usr/bin/env ruby
2
+ $: << File.realpath(File.dirname(File.dirname(__FILE__))) + "/lib/extraloop"
3
+
4
+ require 'thor'
5
+ require 'pry'
6
+ require 'fileutils'
7
+ require 'thor/group'
8
+ require 'redis-storage'
9
+
10
+ class DataStoreCommand < Thor
11
+
12
+ ExtraLoop::Storage::autoload_models
13
+
14
+ @@sessions = ExtraLoop::Storage::ScrapingSession.all
15
+ @@redis = Ohm.redis
16
+
17
+ map "l" => :list,
18
+ "d" => :delete,
19
+ "e" => :export
20
+
21
+ desc "list [sessions]", "List harvested datasets filtering by session id range (e.g '25..50')"
22
+ def list(sessions=nil)
23
+ data = (filter sessions).map { |session| [ session.id, session.title, session.model && session.model.name, session.model && session.records.size ]}
24
+ $stdout.puts tabularize(%w[id title model records], data)
25
+ end
26
+
27
+ desc "delete [sessions]", "Remove datasets by session id or session id range"
28
+ def delete(sessions)
29
+ deleted = 0
30
+ (filter sessions).each { |session| (session.delete && session.records.each(&:delete) ) && deleted += 1 }
31
+ $stderr.puts "\n => #{deleted > 0 && deleted or 'No' } record#{'s' if deleted > 1} deleted \n\n"
32
+ list
33
+ end
34
+
35
+ desc "export [sessions]", "Export datasets by session id or session id range"
36
+
37
+ method_option :format, :default => 'json', :type => 'string'
38
+ method_option :directory, :default => "#{Time.now.to_i}-export", :aliases => "-o"
39
+ method_option :require, :type => 'array', :aliases => "-r"
40
+
41
+ def export(sessions)
42
+ paths = Array options[:require]
43
+ require_files paths if paths.any?
44
+ format = options[:format]
45
+ dir = options[:directory]
46
+
47
+ raise FormatNotImplemented.new unless %w[json].include? format
48
+ FileUtils.mkdir(dir) unless File.exists? dir
49
+
50
+ (filter sessions).each do |session|
51
+ filename, data = *[ "#{session.id}_#{session.title.gsub(/\s/,"_")}", session.send("to_#{format}")]
52
+ File.open("#{dir}/#{filename}.#{format}", "w") { |f| f.write data }
53
+ end
54
+ end
55
+
56
+ # override default banner
57
+ def self.banner(task, namespace = true, subcommand = false)
58
+ "datastore#{task.formatted_usage(self, true, subcommand).gsub(/data_store_command/,'')}"
59
+ end
60
+
61
+ private
62
+ def require_files(paths)
63
+ files = paths.map do |path|
64
+ path = path.gsub /^(\.?\/)|(\/$)$/m, ''
65
+ exception = DataStoreCommand::Exceptions::FileNotFound.new "cannot find #{path}"
66
+ raise exception unless File.exists?(path)
67
+ (File.directory? path) && Dir["#{path}/*.rb"] or path
68
+
69
+ end.flatten
70
+
71
+ files.each { |file| require "./#{file}" }
72
+ end
73
+
74
+
75
+ def filter(arg=nil)
76
+ out = if arg
77
+ start_end = /(\d+)\.\.(-?\d+)/.match(arg) ? [$1, $2] : [arg, arg]
78
+ Array(Range.new *start_end).map { |sid| @@sessions[sid] if @@sessions[sid] }.reject(&:nil?)
79
+ else
80
+ @@sessions
81
+ end
82
+
83
+ end
84
+
85
+ def tabularize(headers, data)
86
+ rows = [headers].concat data.map { |row| row.take(headers.size) }
87
+
88
+ max_cell_sizes = Array(Range.new 0, headers.size).map do |coll_index|
89
+ rows.map { |row| row[coll_index].to_s.length }.max
90
+ end
91
+
92
+ rows = rows.map do |row|
93
+ " " << row.each_with_index.map { |cell, index| cell.to_s.ljust(max_cell_sizes[index]) }.join(" | ")
94
+ end
95
+
96
+ separator = "-" * (rows.map &:size).max
97
+ rows.insert(1, separator).join "\n"
98
+ end
99
+ end
100
+
101
+ class DataStoreCommand::Exceptions
102
+ class FormatNotImplemented < StandardError; end
103
+ class FileNotFound < StandardError; end
104
+ end
105
+
106
+ class ExtraLoopCommand < Thor
107
+ register(DataStoreCommand, "datastore", "datastore <command>", "CLI interface to the ExtraLoop Redis Datastore")
108
+ end
109
+
110
+ ExtraLoopCommand.start
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: extraloop-redis-storage
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2012-02-26 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: extraloop
16
- requirement: &18201600 !ruby/object:Gem::Requirement
16
+ requirement: &16543420 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 0.0.3
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *18201600
24
+ version_requirements: *16543420
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: ohm
27
- requirement: &18201140 !ruby/object:Gem::Requirement
27
+ requirement: &16542960 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 0.1.3
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *18201140
35
+ version_requirements: *16542960
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: ohm-contrib
38
- requirement: &18200680 !ruby/object:Gem::Requirement
38
+ requirement: &16542460 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: 0.1.2
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *18200680
46
+ version_requirements: *16542460
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: thor
49
- requirement: &18200080 !ruby/object:Gem::Requirement
49
+ requirement: &16541860 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - =
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 0.14.6
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *18200080
57
+ version_requirements: *16541860
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: rspec
60
- requirement: &18199420 !ruby/object:Gem::Requirement
60
+ requirement: &16541180 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 2.7.0
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *18199420
68
+ version_requirements: *16541180
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: rr
71
- requirement: &18198820 !ruby/object:Gem::Requirement
71
+ requirement: &16540580 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ~>
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: 1.0.4
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *18198820
79
+ version_requirements: *16540580
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: pry
82
- requirement: &18187500 !ruby/object:Gem::Requirement
82
+ requirement: &16540100 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ~>
@@ -87,10 +87,11 @@ dependencies:
87
87
  version: 0.9.7.4
88
88
  type: :development
89
89
  prerelease: false
90
- version_requirements: *18187500
90
+ version_requirements: *16540100
91
91
  description: Redis+Ohm based storage for data sets extracted using the ExtraLoop toolkit.
92
92
  email: andrea.giulio.fiore@googlemail.com
93
- executables: []
93
+ executables:
94
+ - extraloop
94
95
  extensions: []
95
96
  extra_rdoc_files: []
96
97
  files:
@@ -110,6 +111,7 @@ files:
110
111
  - spec/record_spec.rb
111
112
  - spec/scraper_base_spec.rb
112
113
  - spec/scraping_session_spec.rb
114
+ - bin/extraloop
113
115
  homepage: http://github.com/afiore/extraloop-redis-storage
114
116
  licenses: []
115
117
  post_install_message: