datapimp 1.0.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +26 -198
- data/datapimp.gemspec +2 -2
- data/lib/datapimp/cli/config.rb +17 -0
- data/lib/datapimp/cli/list.rb +15 -0
- data/lib/datapimp/cli/support/data_sync.rb +22 -0
- data/lib/datapimp/cli/sync.rb +16 -1
- data/lib/datapimp/clients/dropbox.rb +0 -21
- data/lib/datapimp/sources/google.rb +212 -0
- data/lib/datapimp/sources.rb +198 -0
- data/lib/datapimp/sync/dropbox_folder.rb +31 -4
- data/lib/datapimp/version.rb +1 -1
- data/lib/datapimp.rb +4 -1
- metadata +16 -14
- data/lib/datapimp/data_sources/google.rb +0 -5
- data/lib/datapimp/data_sources.rb +0 -10
- /data/lib/datapimp/{data_sources → sources}/dropbox.rb +0 -0
- /data/lib/datapimp/{data_sources → sources}/excel.rb +0 -0
- /data/lib/datapimp/{data_sources → sources}/github.rb +0 -0
- /data/lib/datapimp/{data_sources → sources}/json.rb +0 -0
- /data/lib/datapimp/{data_sources → sources}/nokogiri.rb +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a1b62d4316928f5d3ffa350d05cb579649fa1a67
|
4
|
+
data.tar.gz: d77cae1fa5c88ac3dedd9711dd8200cc98de1a6f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f373b48034f1ca5382f7e19f81cb86d623fa143ec22d72cc5f924115dae2b137d5d4103771fb471593bdc794a9498f34cba989e5d547a3903b87cfdd4ed24c08
|
7
|
+
data.tar.gz: 3e1df1aef41dfa912ebadde158c70fc8991869527d2b5b6f019739096110aa918f603e01c47e7029e6bdffdec97604b8fd022da7f3e2d5a4b78fbe1937651d6f
|
data/README.md
CHANGED
@@ -1,211 +1,39 @@
|
|
1
|
-
###
|
1
|
+
### Tools for working with common data sources
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
A collection of CLI utilities for pulling both files and structured data
|
4
|
+
from file sharing and collaboration services like Google Drive and
|
5
|
+
Dropbox.
|
5
6
|
|
6
|
-
|
7
|
+
You can use this to pull down google spreadsheets and convert them to
|
8
|
+
JSON structures, or to convert an excel spreadsheet on Dropbox to the
|
9
|
+
same.
|
7
10
|
|
8
|
-
|
11
|
+
You can use this to sync the contents of a local folder on your system
|
12
|
+
with the remote contents of a file share on Dropbox or Google Drive.
|
9
13
|
|
10
|
-
|
14
|
+
And much more.
|
11
15
|
|
12
|
-
###
|
16
|
+
### Getting Started
|
13
17
|
|
14
|
-
```
|
15
|
-
|
16
|
-
|
17
|
-
api :my_app => "My Application" do
|
18
|
-
version :v1
|
19
|
-
|
20
|
-
desc "Public users include anyone with access to the URL"
|
21
|
-
policy :public_users do
|
22
|
-
allow :books, :commands => false, :queries => true
|
23
|
-
end
|
24
|
-
|
25
|
-
desc "Authenticated users register and are given an auth token"
|
26
|
-
policy :logged_in_users do
|
27
|
-
authenticate_with :header => 'X-AUTH-TOKEN', :param => :auth_token
|
28
|
-
allow :books, :commands => true, :queries => true
|
29
|
-
end
|
30
|
-
|
31
|
-
desc "Admin users have the admin flag set to true"
|
32
|
-
policy :admin_users do
|
33
|
-
extends :logged_in_users
|
34
|
-
test :admin?
|
35
|
-
end
|
36
|
-
end
|
37
|
-
```
|
38
|
-
|
39
|
-
An API can be inspected:
|
40
|
-
|
41
|
-
```ruby
|
42
|
-
api("My Application").authentication_header #=> "X-AUTH-TOKEN"
|
43
|
-
api("My Application").policies #=> [:public_users, :logged_in_users, :admin_users]
|
44
|
-
api("My Application").policy(:admin_users).resource(:books).allowed_commands #=> [:create, :update, :delete]
|
45
|
-
```
|
46
|
-
|
47
|
-
An API is made up of resources:
|
48
|
-
|
49
|
-
```ruby
|
50
|
-
resource "Books" do
|
51
|
-
serializer do
|
52
|
-
desc "A unique id for the book", :type => :integer
|
53
|
-
attribute :id
|
54
|
-
|
55
|
-
desc "The title of the book", :type => :string
|
56
|
-
attribute :title
|
57
|
-
|
58
|
-
desc "The year the book was published", :type => :integer
|
59
|
-
attribute :year
|
60
|
-
|
61
|
-
desc "A reference to the author", :type => "Author"
|
62
|
-
has_one :author
|
63
|
-
end
|
64
|
-
|
65
|
-
command :update, "Update a book's attributes" do
|
66
|
-
# Will ensure the command is run with
|
67
|
-
# Book.accessible_to(current_user).find(id).
|
68
|
-
scope :accessible_to
|
69
|
-
|
70
|
-
params do
|
71
|
-
duck :id, :method => :to_s
|
72
|
-
|
73
|
-
optional do
|
74
|
-
string :title
|
75
|
-
end
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
query do
|
80
|
-
start_from :scope => :accessible_to
|
81
|
-
|
82
|
-
params do
|
83
|
-
desc "The year the book was published (example: YYYY)"
|
84
|
-
integer :year_published
|
85
|
-
end
|
86
|
-
|
87
|
-
role :admin do
|
88
|
-
start_from :scope => :all
|
89
|
-
end
|
90
|
-
end
|
91
|
-
end
|
18
|
+
```bash
|
19
|
+
gem install datapimp
|
20
|
+
datapimp help
|
92
21
|
```
|
93
22
|
|
94
|
-
|
95
|
-
|
96
|
-
```ruby
|
97
|
-
meta_data = api("My Application").resource("Books").meta_data
|
23
|
+
#### Available Commands
|
98
24
|
|
99
|
-
meta_data.attributes # {:id => "The id of the book", :year_published => "The year it was published"}
|
100
|
-
meta_data.commands => [:update]
|
101
|
-
meta_data.command(:update).arguments #=> [:id, :year, :title]
|
102
|
-
meta_data.command(:update).optional_arguments #=> [:year, :title]
|
103
25
|
```
|
26
|
+
COMMANDS:
|
104
27
|
|
105
|
-
|
106
|
-
|
107
|
-
|
28
|
+
config Shows the configuration options being used
|
29
|
+
config set manipulate configuration settings
|
30
|
+
help Display global or [command] help documentation
|
31
|
+
list spreadsheets list the spreadsheets which can be used as datasources
|
32
|
+
setup amazon setup integration with amazon
|
33
|
+
setup dropbox setup integration with dropbox
|
34
|
+
setup github setup integration with github
|
35
|
+
setup google setup integration with google drive
|
36
|
+
sync data Synchronize the contents of a local data store with its remote source
|
37
|
+
sync folder Synchronize the contents of a local folder with a file sharing service
|
108
38
|
|
109
|
-
### Customizing the elements
|
110
|
-
|
111
|
-
How are each of these behaviors is stored in code? In a way that will be
|
112
|
-
very familiar to Rails developers, following common naming conventions
|
113
|
-
and file organization patterns.
|
114
|
-
|
115
|
-
```
|
116
|
-
- app
|
117
|
-
- commands
|
118
|
-
- application_command.rb
|
119
|
-
- create_book.rb
|
120
|
-
- update_book.rb
|
121
|
-
- contexts
|
122
|
-
- application_context.rb
|
123
|
-
- book_context.rb
|
124
|
-
- serializers
|
125
|
-
- book_serializer.rb
|
126
39
|
```
|
127
|
-
|
128
|
-
### Request Context: Current User, Resource, and REST
|
129
|
-
|
130
|
-
From the programmer's perspective, a typical resource is made up of several request patterns:
|
131
|
-
|
132
|
-
- Filter Context (index, show)
|
133
|
-
- Commands (aka mutations. create, update, destroy)
|
134
|
-
- Serializers (aka presenters, views)
|
135
|
-
|
136
|
-
Each of these objects can be configured to behave in certain ways that may be dependent on the user or role making the request to interact with them.
|
137
|
-
|
138
|
-
Most API requests can be thought of in the following ways:
|
139
|
-
|
140
|
-
```ruby
|
141
|
-
# A Typical read request ( query / filter or detail view )
|
142
|
-
|
143
|
-
response = present( this_resource ) # resource -> filter context
|
144
|
-
.to(this_user) # filter context: relevant for this user
|
145
|
-
.in(this_presentation) # serializer: different slices / renderings
|
146
|
-
|
147
|
-
response.cache_key # russian doll style / max-updated-at friendly
|
148
|
-
response.etag # http client conditional get
|
149
|
-
```
|
150
|
-
|
151
|
-
The filter context and serializer classes make this easy. They also
|
152
|
-
make writing -- or rather, generating -- documentation and tests very
|
153
|
-
easy as well.
|
154
|
-
|
155
|
-
```ruby
|
156
|
-
# Typical mutation request ( create, update, delete )
|
157
|
-
|
158
|
-
outcome = run(this_command)
|
159
|
-
.as(this_user)
|
160
|
-
.against(this_set_of_one_or_more_records)
|
161
|
-
.with(these_arguments)
|
162
|
-
|
163
|
-
outcome.success?
|
164
|
-
|
165
|
-
outcome.error_messages
|
166
|
-
|
167
|
-
outcome.result
|
168
|
-
```
|
169
|
-
|
170
|
-
The command class determines the specifics of the above style of
|
171
|
-
request.
|
172
|
-
|
173
|
-
### The Filter Context
|
174
|
-
|
175
|
-
The filter context system is used to standardize the way we write
|
176
|
-
typical index and show actions in a typical Rails app. A user is
|
177
|
-
requesting to view a set of records, or an individual records.
|
178
|
-
|
179
|
-
Given a user making a request to view a specific resource, we arrive at
|
180
|
-
the 'filter context'. The filter context is responsible for 'scoping' a
|
181
|
-
resource to the set of records that user is permitted to view.
|
182
|
-
|
183
|
-
Based on the combination of parameters used to build that filter, we
|
184
|
-
compute a cache key that simplifies the process of server caching and
|
185
|
-
http client caching at the same time.
|
186
|
-
|
187
|
-
The filter context itself and the available parameters and their allowed
|
188
|
-
values are specified by the DSL, which simplifies the process of writing
|
189
|
-
complex queries and also provides configuration meta-data that aids in
|
190
|
-
the process of developing client user interfaces, API documentation, and
|
191
|
-
test code.
|
192
|
-
|
193
|
-
### Commands
|
194
|
-
|
195
|
-
The command class allows you to declare the available parameters, the
|
196
|
-
required values, their data types, etc. It also allows you to declare
|
197
|
-
which users can run the command, and further restrict the parameters
|
198
|
-
allowed and the values they accept.
|
199
|
-
|
200
|
-
### Serializers
|
201
|
-
|
202
|
-
- ActiveModel Serializers
|
203
|
-
- Documentation DSL
|
204
|
-
- Metadata for inspection + documentation generation
|
205
|
-
|
206
|
-
## API Documentation & Integration Tests
|
207
|
-
|
208
|
-
- rspec_api_documentation gem
|
209
|
-
- plan: take advantage of metadata defined above to auto-generate
|
210
|
-
documentation with the ability to pass expectation blocks as pass /
|
211
|
-
fail indicators
|
data/datapimp.gemspec
CHANGED
@@ -18,11 +18,11 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
19
|
|
20
20
|
spec.add_dependency 'pry'
|
21
|
-
spec.add_dependency 'hashie'
|
21
|
+
spec.add_dependency 'hashie'
|
22
22
|
spec.add_dependency 'colored'
|
23
23
|
spec.add_dependency 'commander'
|
24
24
|
spec.add_dependency 'fog-aws'
|
25
|
-
spec.add_dependency 'dropbox-api'
|
25
|
+
spec.add_dependency 'dropbox-api', '> 0.4.3'
|
26
26
|
spec.add_dependency 'google_drive'
|
27
27
|
spec.add_dependency 'rack-contrib'
|
28
28
|
spec.add_dependency 'uri_template'
|
data/lib/datapimp/cli/config.rb
CHANGED
@@ -1,3 +1,20 @@
|
|
1
|
+
command 'config' do |c|
|
2
|
+
c.syntax = "datapimp config [OPTIONS]"
|
3
|
+
c.description = "Shows the configuration options being used"
|
4
|
+
|
5
|
+
c.option '--env', "Output compatible with .env files"
|
6
|
+
|
7
|
+
c.action do |args, options|
|
8
|
+
Datapimp.config.current.to_hash.each do |key, value|
|
9
|
+
if options.env
|
10
|
+
puts "#{ key.to_s.upcase }= '#{ value }'"
|
11
|
+
else
|
12
|
+
puts "#{key}: #{value}"
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
1
18
|
command 'config set' do |c|
|
2
19
|
c.syntax = 'datapimp config set KEY=VALUE KEY=VALUE [options]'
|
3
20
|
c.description = 'manipulate configuration settings'
|
@@ -0,0 +1,15 @@
|
|
1
|
+
command "list spreadsheets" do |c|
|
2
|
+
c.syntax = "datapimp list spreadsheets"
|
3
|
+
c.description = "list the spreadsheets which can be used as datasources"
|
4
|
+
|
5
|
+
c.option '--type TYPE', String, "What type of source data is this? #{ Datapimp::Sync.data_source_types.join(", ") }"
|
6
|
+
|
7
|
+
Datapimp::Cli.accepts_keys_for(c, :google, :dropbox)
|
8
|
+
|
9
|
+
c.action do |args, options|
|
10
|
+
Datapimp::Sync.google.spreadsheets.each do |sheet|
|
11
|
+
puts "#{ sheet.key }\t\t#{ sheet.title }"
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Datapimp::DataSync
|
2
|
+
def self.sync_google_spreadsheet(options, *args)
|
3
|
+
require 'google_drive' unless defined?(::GoogleDrive)
|
4
|
+
|
5
|
+
raise 'Must setup google client' unless Datapimp::Sync.google.spreadsheets
|
6
|
+
|
7
|
+
key = args.shift
|
8
|
+
name = args.shift || "Spreadsheet"
|
9
|
+
|
10
|
+
raise 'Must supply a spreadsheet key' unless key
|
11
|
+
|
12
|
+
spreadsheet = Datapimp::Sources::GoogleSpreadsheet.new(name, key: key)
|
13
|
+
|
14
|
+
if options.output
|
15
|
+
Pathname(options.output).open("w+") do |fh|
|
16
|
+
fh.write(spreadsheet.to_s)
|
17
|
+
end
|
18
|
+
else
|
19
|
+
puts spreadsheet.to_s
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
data/lib/datapimp/cli/sync.rb
CHANGED
@@ -3,11 +3,23 @@ command "sync folder" do |c|
|
|
3
3
|
c.syntax = "datapimp sync folder LOCAL_PATH REMOTE_PATH [OPTIONS]"
|
4
4
|
|
5
5
|
c.option '--type TYPE', String, "Which service is hosting the folder"
|
6
|
+
c.option '--action ACTION', String, "Which sync action to run? push, pull"
|
6
7
|
|
7
8
|
Datapimp::Cli.accepts_keys_for(c, :amazon, :google, :github, :dropbox)
|
8
9
|
|
9
10
|
c.action do |args, options|
|
11
|
+
options.default(action:"pull", type: "dropbox")
|
10
12
|
|
13
|
+
local, remote = args
|
14
|
+
|
15
|
+
folder = case
|
16
|
+
when options.type == "dropbox"
|
17
|
+
Datapimp::Sync::DropboxFolder.new(local: local, remote: remote)
|
18
|
+
when options.type == "google"
|
19
|
+
Datapimp::Sync::GoogleDriveFolder.new(local: local, remote: remote)
|
20
|
+
end
|
21
|
+
|
22
|
+
folder.run(options.action)
|
11
23
|
end
|
12
24
|
end
|
13
25
|
|
@@ -20,10 +32,13 @@ command "sync data" do |c|
|
|
20
32
|
c.option '--columns NAMES', Array, "Extract only these columns"
|
21
33
|
|
22
34
|
c.example "Syncing an excel file from dropbox ", "datapimp sync data --type dropbox --columns name,description --dropbox-app-key ABC --dropbox-app-secret DEF --dropbox-client-token HIJ --dropbox-client-secret JKL spreadsheets/test.xslx"
|
35
|
+
c.example "Syncing a google spreadsheet", "datapimp sync data --type google-spreadsheet WHATEVER_THE_KEY_IS"
|
23
36
|
|
24
37
|
Datapimp::Cli.accepts_keys_for(c, :google, :github, :dropbox)
|
25
38
|
|
26
39
|
c.action do |args, options|
|
27
|
-
|
40
|
+
if options.type == "google-spreadsheet"
|
41
|
+
Datapimp::DataSync.sync_google_spreadsheet(options, args)
|
42
|
+
end
|
28
43
|
end
|
29
44
|
end
|
@@ -72,27 +72,6 @@ module Datapimp
|
|
72
72
|
end
|
73
73
|
end
|
74
74
|
|
75
|
-
def sync(local_path, remote_path, options={})
|
76
|
-
sync_folders(local_path, remote_path, options)
|
77
|
-
end
|
78
|
-
|
79
|
-
# Instructs Datapimp to create a syncable folder
|
80
|
-
# between the local and remote path, and optionally
|
81
|
-
# modifies the middleman config for the site
|
82
|
-
def sync_folders(local_path, remote_path, options={})
|
83
|
-
app = options[:app]
|
84
|
-
|
85
|
-
folder = Datapimp::Sync::Folder.new(local_path: local_path,
|
86
|
-
remote_path: remote_path,
|
87
|
-
app: app).synced
|
88
|
-
|
89
|
-
if !!options[:append_config] == true
|
90
|
-
Datapimp.append_config(folder.config_line)
|
91
|
-
end
|
92
|
-
|
93
|
-
folder
|
94
|
-
end
|
95
|
-
|
96
75
|
def method_missing meth, *args, &block
|
97
76
|
if api.respond_to?(meth)
|
98
77
|
return api.send(meth, *args, &block)
|
@@ -0,0 +1,212 @@
|
|
1
|
+
module Datapimp::Sources
|
2
|
+
class GoogleSpreadsheet < Datapimp::Sources::Base
|
3
|
+
requires :key
|
4
|
+
|
5
|
+
attr_accessor :key,
|
6
|
+
:session,
|
7
|
+
:name
|
8
|
+
|
9
|
+
def initialize name, options={}
|
10
|
+
@options = options
|
11
|
+
|
12
|
+
if name.is_a?(GoogleDrive::Spreadsheet)
|
13
|
+
@spreadsheet = name
|
14
|
+
@name = @spreadsheet.title
|
15
|
+
@key = @spreadsheet.key
|
16
|
+
end
|
17
|
+
|
18
|
+
@key ||= options[:key]
|
19
|
+
@session ||= options.fetch(:session) { Datapimp::Sync.google.api }
|
20
|
+
|
21
|
+
ensure_valid_options!
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.create_from_file(path, title)
|
25
|
+
if find_by_title(title)
|
26
|
+
raise 'Spreadsheet with this title already exists'
|
27
|
+
end
|
28
|
+
|
29
|
+
session.upload_from_file(path, title, :content_type => "text/csv")
|
30
|
+
|
31
|
+
find_by_title(title)
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.[](key_or_title)
|
35
|
+
find_by_key(key_or_title) || find_by_title(key_or_title)
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.find_by_key(key)
|
39
|
+
sheet = session_spreadsheets.detect do |spreadsheet|
|
40
|
+
spreadsheet.key == key
|
41
|
+
end
|
42
|
+
|
43
|
+
sheet && new(sheet, session: Datapimp::Sync.google.session)
|
44
|
+
end
|
45
|
+
|
46
|
+
def self.find_by_title title
|
47
|
+
sheet = session_spreadsheets.detect do |spreadsheet|
|
48
|
+
spreadsheet.title.match(title)
|
49
|
+
end
|
50
|
+
|
51
|
+
sheet && new(sheet, session: Datapimp::Sync.google.session)
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.session_spreadsheets
|
55
|
+
@session_spreadsheets ||= Datapimp::Sync.google.api.spreadsheets
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.create_from_data(data, options={})
|
59
|
+
require 'csv'
|
60
|
+
|
61
|
+
headers = Array(options[:headers]).map(&:to_s)
|
62
|
+
|
63
|
+
tmpfile = "tmp-csv.csv"
|
64
|
+
|
65
|
+
CSV.open(tmpfile, "wb") do |csv|
|
66
|
+
csv << headers
|
67
|
+
|
68
|
+
data.each do |row|
|
69
|
+
csv << headers.map do |header|
|
70
|
+
row = row.stringify_keys
|
71
|
+
row[header.to_s]
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
spreadsheet = Datapimp::Sync.google.api.upload_from_file(tmpfile, options[:title], :content_type => "text/csv")
|
77
|
+
|
78
|
+
new(spreadsheet.title, key: spreadsheet.key)
|
79
|
+
end
|
80
|
+
|
81
|
+
|
82
|
+
def title
|
83
|
+
@name ||= spreadsheet.try(:title)
|
84
|
+
end
|
85
|
+
|
86
|
+
def edit_url
|
87
|
+
spreadsheet.human_url
|
88
|
+
end
|
89
|
+
|
90
|
+
def share_write_access_with *emails
|
91
|
+
acl = spreadsheet.acl
|
92
|
+
|
93
|
+
Array(emails).flatten.each do |email|
|
94
|
+
acl.push scope_type: "user",
|
95
|
+
with_key: false,
|
96
|
+
role: "writer",
|
97
|
+
scope: email
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def share_read_access_with *emails
|
102
|
+
acl = spreadsheet.acl
|
103
|
+
|
104
|
+
Array(emails).flatten.each do |email|
|
105
|
+
acl.push scope_type: "user",
|
106
|
+
with_key: false,
|
107
|
+
role: "reader",
|
108
|
+
scope: email
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def add_to_collection collection_title
|
113
|
+
collection = if collection_title.is_a?(GoogleDrive::Collection)
|
114
|
+
collection_title
|
115
|
+
else
|
116
|
+
session.collections.find do |c|
|
117
|
+
c.title == collection_title
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
if !collection
|
122
|
+
collection_names = session.collections.map(&:title)
|
123
|
+
raise 'Could not find collection in Google drive. Maybe you mean: ' + collection_names.join(', ')
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def spreadsheet_key
|
128
|
+
key
|
129
|
+
end
|
130
|
+
|
131
|
+
def stale?
|
132
|
+
(!need_to_refresh? && (age > max_age)) || fresh_on_server?
|
133
|
+
end
|
134
|
+
|
135
|
+
def fresh_on_server?
|
136
|
+
refreshed_at.to_i > 0 && (last_updated_at > refreshed_at)
|
137
|
+
end
|
138
|
+
|
139
|
+
def last_updated_at
|
140
|
+
if value = spreadsheet.document_feed_entry_internal.css('updated').try(:text) rescue nil
|
141
|
+
DateTime.parse(value).to_i
|
142
|
+
else
|
143
|
+
Time.now.to_i
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
def fetch
|
148
|
+
self.raw = process_worksheets
|
149
|
+
end
|
150
|
+
|
151
|
+
def preprocess
|
152
|
+
single? ? raw.values.flatten : raw
|
153
|
+
end
|
154
|
+
|
155
|
+
protected
|
156
|
+
|
157
|
+
def process_worksheets
|
158
|
+
worksheets.inject({}.to_mash) do |memo, parts|
|
159
|
+
k, ws = parts
|
160
|
+
header_row = Array(ws.rows[0])
|
161
|
+
column_names = header_row.map {|cell| "#{ cell }".parameterize.underscore }
|
162
|
+
rows = ws.rows.slice(1, ws.rows.length)
|
163
|
+
|
164
|
+
row_index = 1
|
165
|
+
memo[k] = rows.map do |row|
|
166
|
+
col_index = 0
|
167
|
+
|
168
|
+
_record = column_names.inject({}) do |record, field|
|
169
|
+
record[field] = "#{ row[col_index] }".strip
|
170
|
+
record["_id"] = row_index
|
171
|
+
col_index += 1
|
172
|
+
record
|
173
|
+
end
|
174
|
+
|
175
|
+
row_index += 1
|
176
|
+
|
177
|
+
_record
|
178
|
+
end
|
179
|
+
|
180
|
+
memo
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def single?
|
185
|
+
worksheets.length == 1
|
186
|
+
end
|
187
|
+
|
188
|
+
def header_rows_for_worksheet key
|
189
|
+
if key.is_a?(Fixnum)
|
190
|
+
_worksheets[key]
|
191
|
+
else
|
192
|
+
worksheets.fetch(key)
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
def worksheets
|
197
|
+
@worksheets ||= _worksheets.inject({}.to_mash) do |memo,ws|
|
198
|
+
key = ws.title.strip.downcase.underscore.gsub(/\s+/,'_')
|
199
|
+
memo[key] = ws
|
200
|
+
memo
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
def _worksheets
|
205
|
+
@_worksheets ||= spreadsheet.worksheets
|
206
|
+
end
|
207
|
+
|
208
|
+
def spreadsheet
|
209
|
+
@spreadsheet ||= session.spreadsheet_by_key(spreadsheet_key)
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
@@ -0,0 +1,198 @@
|
|
1
|
+
# The `Datapimp::Sources` module houses the various
|
2
|
+
# types of remote data stores we are reading and converting into
|
3
|
+
# a JSON array of objects that gets cached on our filesystem.
|
4
|
+
#
|
5
|
+
module Datapimp
|
6
|
+
module Sources
|
7
|
+
class Base
|
8
|
+
attr_reader :options, :name
|
9
|
+
attr_accessor :raw, :processed, :format, :scopes, :slug_column, :refreshed_at, :path
|
10
|
+
|
11
|
+
class << self
|
12
|
+
attr_accessor :required_options
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.requires *args
|
16
|
+
self.required_options = args
|
17
|
+
end
|
18
|
+
|
19
|
+
def initialize(name, options={})
|
20
|
+
@name ||= name
|
21
|
+
@options ||= options
|
22
|
+
@format ||= options.fetch(:format, :json)
|
23
|
+
@path ||= options.fetch(:path) { Pathname(Dir.pwd()) }
|
24
|
+
|
25
|
+
@slug_column = options.fetch(:slug_column, :_id)
|
26
|
+
|
27
|
+
ensure_valid_options!
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_s
|
31
|
+
data.to_json
|
32
|
+
end
|
33
|
+
|
34
|
+
# defines a scope for the records in this data source
|
35
|
+
# a scope is a named filter, implemented in the form of a block
|
36
|
+
# which is passed each record. if the block returns true, it returns
|
37
|
+
# the record:
|
38
|
+
#
|
39
|
+
# Example:
|
40
|
+
#
|
41
|
+
# data_source(:galleries) do
|
42
|
+
# scope :active, -> {|record| record.state == "active" }
|
43
|
+
# end
|
44
|
+
def scope(*args, block)
|
45
|
+
name = args.first
|
46
|
+
(self.scopes ||= {})[name.to_sym] = block
|
47
|
+
end
|
48
|
+
|
49
|
+
def has_scope?(scope_name)
|
50
|
+
scope_name && (self.scopes ||= {}).key?(scope_name.to_sym)
|
51
|
+
end
|
52
|
+
|
53
|
+
# compute properties takes the raw data of each record
|
54
|
+
# and sets additional properties on the records which may
|
55
|
+
# not be persited in the data source
|
56
|
+
def compute_properties
|
57
|
+
self.processed && self.processed.map! do |row|
|
58
|
+
if slug_column && row.respond_to?(slug_column)
|
59
|
+
row.slug = row.send(slug_column).to_s.parameterize
|
60
|
+
end
|
61
|
+
|
62
|
+
row
|
63
|
+
end
|
64
|
+
|
65
|
+
processors.each do |processor|
|
66
|
+
original = self.processed.dup
|
67
|
+
modified = []
|
68
|
+
|
69
|
+
original.each_with_index do |record, index|
|
70
|
+
previous = original[index - 1]
|
71
|
+
modified.push(processor.call(record, index, previous: previous, set: original))
|
72
|
+
end
|
73
|
+
|
74
|
+
self.processed = modified
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def processors &block
|
79
|
+
@processors ||= []
|
80
|
+
@processors << block if block_given?
|
81
|
+
@processors
|
82
|
+
end
|
83
|
+
|
84
|
+
# makes sure that the required options for this data source
|
85
|
+
# are passed for any instance of the data source
|
86
|
+
def ensure_valid_options!
|
87
|
+
missing_options = (Array(self.class.required_options) - options.keys.map(&:to_sym))
|
88
|
+
|
89
|
+
missing_options.reject! do |key|
|
90
|
+
respond_to?(key) && !send(key).nil?
|
91
|
+
end
|
92
|
+
|
93
|
+
if missing_options.length > 0
|
94
|
+
raise 'Error: failure to supply the following options: ' + missing_options.map(&:to_s).join(",")
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def select(&block)
|
99
|
+
data.select(&block)
|
100
|
+
end
|
101
|
+
|
102
|
+
def refresh
|
103
|
+
fetch
|
104
|
+
process
|
105
|
+
self.refreshed_at = Time.now.to_i
|
106
|
+
self
|
107
|
+
end
|
108
|
+
|
109
|
+
def refresh_if_stale?
|
110
|
+
refresh! if stale?
|
111
|
+
end
|
112
|
+
|
113
|
+
# A data source is stale if it has been populated
|
114
|
+
# and the age is greater than the max age we allow.
|
115
|
+
def stale?
|
116
|
+
!need_to_refresh? && (age > max_age)
|
117
|
+
end
|
118
|
+
|
119
|
+
def fresh_on_server?
|
120
|
+
need_to_refresh?
|
121
|
+
end
|
122
|
+
|
123
|
+
def max_age
|
124
|
+
max = ENV['MAX_DATA_SOURCE_AGE']
|
125
|
+
(max && max.to_i) || 120
|
126
|
+
end
|
127
|
+
|
128
|
+
# how long since this data source has been refreshed?
|
129
|
+
def age
|
130
|
+
Time.now.to_i - refreshed_at.to_i
|
131
|
+
end
|
132
|
+
|
133
|
+
def data
|
134
|
+
refresh if need_to_refresh?
|
135
|
+
processed
|
136
|
+
end
|
137
|
+
|
138
|
+
def refresh!
|
139
|
+
refresh
|
140
|
+
save_to_disk
|
141
|
+
end
|
142
|
+
|
143
|
+
def need_to_refresh?
|
144
|
+
!(@fetched && @_processed)
|
145
|
+
end
|
146
|
+
|
147
|
+
def fetch
|
148
|
+
@fetched = true
|
149
|
+
self.raw = []
|
150
|
+
end
|
151
|
+
|
152
|
+
def preprocess
|
153
|
+
self.raw.dup
|
154
|
+
end
|
155
|
+
|
156
|
+
def process
|
157
|
+
@_processed = true
|
158
|
+
self.processed = preprocess
|
159
|
+
# set_id
|
160
|
+
compute_properties
|
161
|
+
self.processed
|
162
|
+
end
|
163
|
+
|
164
|
+
def refreshed_at
|
165
|
+
return @refreshed_at if @refreshed_at.to_i > 0
|
166
|
+
|
167
|
+
if path_to_file.exist?
|
168
|
+
@refreshed_at = File.mtime(path.join(file)).to_i
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def save_to_disk
|
173
|
+
unless path_to_file.dirname.exist?
|
174
|
+
FileUtils.mkdir(path_to_file.dirname)
|
175
|
+
end
|
176
|
+
|
177
|
+
path_to_file.open('w+') {|fh| fh.write(to_s) }
|
178
|
+
end
|
179
|
+
|
180
|
+
def persisted?
|
181
|
+
path_to_file && path_to_file.exist?
|
182
|
+
end
|
183
|
+
|
184
|
+
def file
|
185
|
+
@file ||= name.parameterize if name.respond_to?(:parameterize)
|
186
|
+
@file.gsub!("-","_")
|
187
|
+
@file = "#{@file}.json" unless @file.match(/\.json/i)
|
188
|
+
@file
|
189
|
+
end
|
190
|
+
|
191
|
+
def path_to_file
|
192
|
+
Pathname(path).join("#{ file }")
|
193
|
+
end
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
Dir[Datapimp.lib.join("datapimp/sources/**/*.rb")].each {|f| require(f) }
|
@@ -1,10 +1,37 @@
|
|
1
1
|
module Datapimp
|
2
|
-
class Sync::DropboxFolder <
|
3
|
-
|
2
|
+
class Sync::DropboxFolder < Hashie::Mash
|
3
|
+
def dropbox
|
4
|
+
@dropbox ||= Datapimp::Sync.dropbox
|
5
|
+
end
|
6
|
+
|
7
|
+
def delta
|
8
|
+
@delta ||= dropbox.delta(cursor, remote_path)
|
9
|
+
end
|
10
|
+
|
11
|
+
def local_path
|
12
|
+
Pathname(local)
|
13
|
+
end
|
14
|
+
|
15
|
+
def remote_path
|
16
|
+
Datapimp::Sync.dropbox.ls(remote)
|
17
|
+
end
|
18
|
+
|
19
|
+
def cursor
|
20
|
+
cursor_path.exist? && cursor_path.read
|
21
|
+
end
|
22
|
+
|
23
|
+
def cursor_path
|
24
|
+
local_path.join('.dropbox-cursor')
|
25
|
+
end
|
26
|
+
|
27
|
+
def run(action)
|
28
|
+
action = action.to_sym
|
29
|
+
|
30
|
+
if action == :push
|
4
31
|
|
32
|
+
elsif action == :pull
|
5
33
|
|
6
|
-
|
7
|
-
Datapimp.dropbox(token: client_token, secret: client_secret)
|
34
|
+
end
|
8
35
|
end
|
9
36
|
end
|
10
37
|
end
|
data/lib/datapimp/version.rb
CHANGED
data/lib/datapimp.rb
CHANGED
@@ -3,6 +3,7 @@ require 'set'
|
|
3
3
|
require 'pathname'
|
4
4
|
require 'hashie'
|
5
5
|
require 'datapimp/core_ext'
|
6
|
+
require 'active_support'
|
6
7
|
|
7
8
|
module Datapimp
|
8
9
|
def self.config
|
@@ -29,5 +30,7 @@ end
|
|
29
30
|
|
30
31
|
require 'datapimp/version'
|
31
32
|
require 'datapimp/configuration'
|
32
|
-
require 'datapimp/
|
33
|
+
require 'datapimp/sources'
|
33
34
|
require 'datapimp/sync'
|
35
|
+
require 'datapimp/sync/dropbox_folder'
|
36
|
+
require 'datapimp/sync/google_drive_folder'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datapimp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jonathan Soeder
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 0
|
33
|
+
version: '0'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 0
|
40
|
+
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: colored
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -84,16 +84,16 @@ dependencies:
|
|
84
84
|
name: dropbox-api
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
|
-
- - "
|
87
|
+
- - ">"
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version:
|
89
|
+
version: 0.4.3
|
90
90
|
type: :runtime
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
|
-
- - "
|
94
|
+
- - ">"
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version:
|
96
|
+
version: 0.4.3
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: google_drive
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -296,7 +296,9 @@ files:
|
|
296
296
|
- lib/datapimp/cli.rb
|
297
297
|
- lib/datapimp/cli/01_extensions.rb
|
298
298
|
- lib/datapimp/cli/config.rb
|
299
|
+
- lib/datapimp/cli/list.rb
|
299
300
|
- lib/datapimp/cli/setup.rb
|
301
|
+
- lib/datapimp/cli/support/data_sync.rb
|
300
302
|
- lib/datapimp/cli/sync.rb
|
301
303
|
- lib/datapimp/clients/amazon.rb
|
302
304
|
- lib/datapimp/clients/dropbox.rb
|
@@ -304,13 +306,13 @@ files:
|
|
304
306
|
- lib/datapimp/clients/google.rb
|
305
307
|
- lib/datapimp/configuration.rb
|
306
308
|
- lib/datapimp/core_ext.rb
|
307
|
-
- lib/datapimp/
|
308
|
-
- lib/datapimp/
|
309
|
-
- lib/datapimp/
|
310
|
-
- lib/datapimp/
|
311
|
-
- lib/datapimp/
|
312
|
-
- lib/datapimp/
|
313
|
-
- lib/datapimp/
|
309
|
+
- lib/datapimp/sources.rb
|
310
|
+
- lib/datapimp/sources/dropbox.rb
|
311
|
+
- lib/datapimp/sources/excel.rb
|
312
|
+
- lib/datapimp/sources/github.rb
|
313
|
+
- lib/datapimp/sources/google.rb
|
314
|
+
- lib/datapimp/sources/json.rb
|
315
|
+
- lib/datapimp/sources/nokogiri.rb
|
314
316
|
- lib/datapimp/sync.rb
|
315
317
|
- lib/datapimp/sync/dropbox_delta.rb
|
316
318
|
- lib/datapimp/sync/dropbox_folder.rb
|
@@ -1,10 +0,0 @@
|
|
1
|
-
# The `Datapimp::Dataources` module houses the various
|
2
|
-
# types of remote data stores we are reading and converting into
|
3
|
-
# a JSON array of objects that gets cached on our filesystem.
|
4
|
-
module Datapimp
|
5
|
-
module DataSources
|
6
|
-
|
7
|
-
end
|
8
|
-
end
|
9
|
-
|
10
|
-
Dir[Datapimp.lib.join("datapimp/data_sources/**/*.rb")].each {|f| require(f) }
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|