extraloop-redis-storage 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/README.rdoc +1 -0
  2. data/bin/extraloop +110 -0
  3. metadata +18 -16
data/README.rdoc CHANGED
@@ -51,3 +51,4 @@ The +set_storage+ method can be called with the following arguments:
51
51
  * _model_ A Ruby constant specifying the model to be used for storing the extracted data .
52
52
  * _session_title_ A human readable title for the extracted dataset (optional).
53
53
 
54
+
data/bin/extraloop ADDED
@@ -0,0 +1,110 @@
1
+ #! /usr/bin/env ruby
2
+ $: << File.realpath(File.dirname(File.dirname(__FILE__))) + "/lib/extraloop"
3
+
4
+ require 'thor'
5
+ require 'pry'
6
+ require 'fileutils'
7
+ require 'thor/group'
8
+ require 'redis-storage'
9
+
10
+ class DataStoreCommand < Thor
11
+
12
+ ExtraLoop::Storage::autoload_models
13
+
14
+ @@sessions = ExtraLoop::Storage::ScrapingSession.all
15
+ @@redis = Ohm.redis
16
+
17
+ map "l" => :list,
18
+ "d" => :delete,
19
+ "e" => :export
20
+
21
+ desc "list [sessions]", "List harvested datasets filtering by session id range (e.g '25..50')"
22
+ def list(sessions=nil)
23
+ data = (filter sessions).map { |session| [ session.id, session.title, session.model && session.model.name, session.model && session.records.size ]}
24
+ $stdout.puts tabularize(%w[id title model records], data)
25
+ end
26
+
27
+ desc "delete [sessions]", "Remove datasets by session id or session id range"
28
+ def delete(sessions)
29
+ deleted = 0
30
+ (filter sessions).each { |session| (session.delete && session.records.each(&:delete) ) && deleted += 1 }
31
+ $stderr.puts "\n => #{deleted > 0 && deleted or 'No' } record#{'s' if deleted > 1} deleted \n\n"
32
+ list
33
+ end
34
+
35
+ desc "export [sessions]", "Export datasets by session id or session id range"
36
+
37
+ method_option :format, :default => 'json', :type => 'string'
38
+ method_option :directory, :default => "#{Time.now.to_i}-export", :aliases => "-o"
39
+ method_option :require, :type => 'array', :aliases => "-r"
40
+
41
+ def export(sessions)
42
+ paths = Array options[:require]
43
+ require_files paths if paths.any?
44
+ format = options[:format]
45
+ dir = options[:directory]
46
+
47
+ raise FormatNotImplemented.new unless %w[json].include? format
48
+ FileUtils.mkdir(dir) unless File.exists? dir
49
+
50
+ (filter sessions).each do |session|
51
+ filename, data = *[ "#{session.id}_#{session.title.gsub(/\s/,"_")}", session.send("to_#{format}")]
52
+ File.open("#{dir}/#{filename}.#{format}", "w") { |f| f.write data }
53
+ end
54
+ end
55
+
56
+ # override default banner
57
+ def self.banner(task, namespace = true, subcommand = false)
58
+ "datastore#{task.formatted_usage(self, true, subcommand).gsub(/data_store_command/,'')}"
59
+ end
60
+
61
+ private
62
+ def require_files(paths)
63
+ files = paths.map do |path|
64
+ path = path.gsub /^(\.?\/)|(\/$)$/m, ''
65
+ exception = DataStoreCommand::Exceptions::FileNotFound.new "cannot find #{path}"
66
+ raise exception unless File.exists?(path)
67
+ (File.directory? path) && Dir["#{path}/*.rb"] or path
68
+
69
+ end.flatten
70
+
71
+ files.each { |file| require "./#{file}" }
72
+ end
73
+
74
+
75
+ def filter(arg=nil)
76
+ out = if arg
77
+ start_end = /(\d+)\.\.(-?\d+)/.match(arg) ? [$1, $2] : [arg, arg]
78
+ Array(Range.new *start_end).map { |sid| @@sessions[sid] if @@sessions[sid] }.reject(&:nil?)
79
+ else
80
+ @@sessions
81
+ end
82
+
83
+ end
84
+
85
+ def tabularize(headers, data)
86
+ rows = [headers].concat data.map { |row| row.take(headers.size) }
87
+
88
+ max_cell_sizes = Array(Range.new 0, headers.size).map do |coll_index|
89
+ rows.map { |row| row[coll_index].to_s.length }.max
90
+ end
91
+
92
+ rows = rows.map do |row|
93
+ " " << row.each_with_index.map { |cell, index| cell.to_s.ljust(max_cell_sizes[index]) }.join(" | ")
94
+ end
95
+
96
+ separator = "-" * (rows.map &:size).max
97
+ rows.insert(1, separator).join "\n"
98
+ end
99
+ end
100
+
101
+ class DataStoreCommand::Exceptions
102
+ class FormatNotImplemented < StandardError; end
103
+ class FileNotFound < StandardError; end
104
+ end
105
+
106
+ class ExtraLoopCommand < Thor
107
+ register(DataStoreCommand, "datastore", "datastore <command>", "CLI interface to the ExtraLoop Redis Datastore")
108
+ end
109
+
110
+ ExtraLoopCommand.start
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: extraloop-redis-storage
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2012-02-26 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: extraloop
16
- requirement: &18201600 !ruby/object:Gem::Requirement
16
+ requirement: &16543420 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 0.0.3
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *18201600
24
+ version_requirements: *16543420
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: ohm
27
- requirement: &18201140 !ruby/object:Gem::Requirement
27
+ requirement: &16542960 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 0.1.3
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *18201140
35
+ version_requirements: *16542960
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: ohm-contrib
38
- requirement: &18200680 !ruby/object:Gem::Requirement
38
+ requirement: &16542460 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: 0.1.2
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *18200680
46
+ version_requirements: *16542460
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: thor
49
- requirement: &18200080 !ruby/object:Gem::Requirement
49
+ requirement: &16541860 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - =
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 0.14.6
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *18200080
57
+ version_requirements: *16541860
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: rspec
60
- requirement: &18199420 !ruby/object:Gem::Requirement
60
+ requirement: &16541180 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 2.7.0
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *18199420
68
+ version_requirements: *16541180
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: rr
71
- requirement: &18198820 !ruby/object:Gem::Requirement
71
+ requirement: &16540580 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ~>
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: 1.0.4
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *18198820
79
+ version_requirements: *16540580
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: pry
82
- requirement: &18187500 !ruby/object:Gem::Requirement
82
+ requirement: &16540100 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ~>
@@ -87,10 +87,11 @@ dependencies:
87
87
  version: 0.9.7.4
88
88
  type: :development
89
89
  prerelease: false
90
- version_requirements: *18187500
90
+ version_requirements: *16540100
91
91
  description: Redis+Ohm based storage for data sets extracted using the ExtraLoop toolkit.
92
92
  email: andrea.giulio.fiore@googlemail.com
93
- executables: []
93
+ executables:
94
+ - extraloop
94
95
  extensions: []
95
96
  extra_rdoc_files: []
96
97
  files:
@@ -110,6 +111,7 @@ files:
110
111
  - spec/record_spec.rb
111
112
  - spec/scraper_base_spec.rb
112
113
  - spec/scraping_session_spec.rb
114
+ - bin/extraloop
113
115
  homepage: http://github.com/afiore/extraloop-redis-storage
114
116
  licenses: []
115
117
  post_install_message: