datapimp 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +26 -198
- data/datapimp.gemspec +2 -2
- data/lib/datapimp/cli/config.rb +17 -0
- data/lib/datapimp/cli/list.rb +15 -0
- data/lib/datapimp/cli/support/data_sync.rb +22 -0
- data/lib/datapimp/cli/sync.rb +16 -1
- data/lib/datapimp/clients/dropbox.rb +0 -21
- data/lib/datapimp/sources/google.rb +212 -0
- data/lib/datapimp/sources.rb +198 -0
- data/lib/datapimp/sync/dropbox_folder.rb +31 -4
- data/lib/datapimp/version.rb +1 -1
- data/lib/datapimp.rb +4 -1
- metadata +16 -14
- data/lib/datapimp/data_sources/google.rb +0 -5
- data/lib/datapimp/data_sources.rb +0 -10
- /data/lib/datapimp/{data_sources → sources}/dropbox.rb +0 -0
- /data/lib/datapimp/{data_sources → sources}/excel.rb +0 -0
- /data/lib/datapimp/{data_sources → sources}/github.rb +0 -0
- /data/lib/datapimp/{data_sources → sources}/json.rb +0 -0
- /data/lib/datapimp/{data_sources → sources}/nokogiri.rb +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a1b62d4316928f5d3ffa350d05cb579649fa1a67
|
4
|
+
data.tar.gz: d77cae1fa5c88ac3dedd9711dd8200cc98de1a6f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f373b48034f1ca5382f7e19f81cb86d623fa143ec22d72cc5f924115dae2b137d5d4103771fb471593bdc794a9498f34cba989e5d547a3903b87cfdd4ed24c08
|
7
|
+
data.tar.gz: 3e1df1aef41dfa912ebadde158c70fc8991869527d2b5b6f019739096110aa918f603e01c47e7029e6bdffdec97604b8fd022da7f3e2d5a4b78fbe1937651d6f
|
data/README.md
CHANGED
@@ -1,211 +1,39 @@
|
|
1
|
-
###
|
1
|
+
### Tools for working with common data sources
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
A collection of CLI utilities for pulling both files and structured data
|
4
|
+
from file sharing and collaboration services like Google Drive and
|
5
|
+
Dropbox.
|
5
6
|
|
6
|
-
|
7
|
+
You can use this to pull down google spreadsheets and convert them to
|
8
|
+
JSON structures, or to convert an excel spreadsheet on Dropbox to the
|
9
|
+
same.
|
7
10
|
|
8
|
-
|
11
|
+
You can use this to sync the contents of a local folder on your system
|
12
|
+
with the remote contents of a file share on Dropbox or Google Drive.
|
9
13
|
|
10
|
-
|
14
|
+
And much more.
|
11
15
|
|
12
|
-
###
|
16
|
+
### Getting Started
|
13
17
|
|
14
|
-
```
|
15
|
-
|
16
|
-
|
17
|
-
api :my_app => "My Application" do
|
18
|
-
version :v1
|
19
|
-
|
20
|
-
desc "Public users include anyone with access to the URL"
|
21
|
-
policy :public_users do
|
22
|
-
allow :books, :commands => false, :queries => true
|
23
|
-
end
|
24
|
-
|
25
|
-
desc "Authenticated users register and are given an auth token"
|
26
|
-
policy :logged_in_users do
|
27
|
-
authenticate_with :header => 'X-AUTH-TOKEN', :param => :auth_token
|
28
|
-
allow :books, :commands => true, :queries => true
|
29
|
-
end
|
30
|
-
|
31
|
-
desc "Admin users have the admin flag set to true"
|
32
|
-
policy :admin_users do
|
33
|
-
extends :logged_in_users
|
34
|
-
test :admin?
|
35
|
-
end
|
36
|
-
end
|
37
|
-
```
|
38
|
-
|
39
|
-
An API can be inspected:
|
40
|
-
|
41
|
-
```ruby
|
42
|
-
api("My Application").authentication_header #=> "X-AUTH-TOKEN"
|
43
|
-
api("My Application").policies #=> [:public_users, :logged_in_users, :admin_users]
|
44
|
-
api("My Application").policy(:admin_users).resource(:books).allowed_commands #=> [:create, :update, :delete]
|
45
|
-
```
|
46
|
-
|
47
|
-
An API is made up of resources:
|
48
|
-
|
49
|
-
```ruby
|
50
|
-
resource "Books" do
|
51
|
-
serializer do
|
52
|
-
desc "A unique id for the book", :type => :integer
|
53
|
-
attribute :id
|
54
|
-
|
55
|
-
desc "The title of the book", :type => :string
|
56
|
-
attribute :title
|
57
|
-
|
58
|
-
desc "The year the book was published", :type => :integer
|
59
|
-
attribute :year
|
60
|
-
|
61
|
-
desc "A reference to the author", :type => "Author"
|
62
|
-
has_one :author
|
63
|
-
end
|
64
|
-
|
65
|
-
command :update, "Update a book's attributes" do
|
66
|
-
# Will ensure the command is run with
|
67
|
-
# Book.accessible_to(current_user).find(id).
|
68
|
-
scope :accessible_to
|
69
|
-
|
70
|
-
params do
|
71
|
-
duck :id, :method => :to_s
|
72
|
-
|
73
|
-
optional do
|
74
|
-
string :title
|
75
|
-
end
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
query do
|
80
|
-
start_from :scope => :accessible_to
|
81
|
-
|
82
|
-
params do
|
83
|
-
desc "The year the book was published (example: YYYY)"
|
84
|
-
integer :year_published
|
85
|
-
end
|
86
|
-
|
87
|
-
role :admin do
|
88
|
-
start_from :scope => :all
|
89
|
-
end
|
90
|
-
end
|
91
|
-
end
|
18
|
+
```bash
|
19
|
+
gem install datapimp
|
20
|
+
datapimp help
|
92
21
|
```
|
93
22
|
|
94
|
-
|
95
|
-
|
96
|
-
```ruby
|
97
|
-
meta_data = api("My Application").resource("Books").meta_data
|
23
|
+
#### Available Commands
|
98
24
|
|
99
|
-
meta_data.attributes # {:id => "The id of the book", :year_published => "The year it was published"}
|
100
|
-
meta_data.commands => [:update]
|
101
|
-
meta_data.command(:update).arguments #=> [:id, :year, :title]
|
102
|
-
meta_data.command(:update).optional_arguments #=> [:year, :title]
|
103
25
|
```
|
26
|
+
COMMANDS:
|
104
27
|
|
105
|
-
|
106
|
-
|
107
|
-
|
28
|
+
config Shows the configuration options being used
|
29
|
+
config set manipulate configuration settings
|
30
|
+
help Display global or [command] help documentation
|
31
|
+
list spreadsheets list the spreadsheets which can be used as datasources
|
32
|
+
setup amazon setup integration with amazon
|
33
|
+
setup dropbox setup integration with dropbox
|
34
|
+
setup github setup integration with github
|
35
|
+
setup google setup integration with google drive
|
36
|
+
sync data Synchronize the contents of a local data store with its remote source
|
37
|
+
sync folder Synchronize the contents of a local folder with a file sharing service
|
108
38
|
|
109
|
-
### Customizing the elements
|
110
|
-
|
111
|
-
How are each of these behaviors is stored in code? In a way that will be
|
112
|
-
very familiar to Rails developers, following common naming conventions
|
113
|
-
and file organization patterns.
|
114
|
-
|
115
|
-
```
|
116
|
-
- app
|
117
|
-
- commands
|
118
|
-
- application_command.rb
|
119
|
-
- create_book.rb
|
120
|
-
- update_book.rb
|
121
|
-
- contexts
|
122
|
-
- application_context.rb
|
123
|
-
- book_context.rb
|
124
|
-
- serializers
|
125
|
-
- book_serializer.rb
|
126
39
|
```
|
127
|
-
|
128
|
-
### Request Context: Current User, Resource, and REST
|
129
|
-
|
130
|
-
From the programmer's perspective, a typical resource is made up of several request patterns:
|
131
|
-
|
132
|
-
- Filter Context (index, show)
|
133
|
-
- Commands (aka mutations. create, update, destroy)
|
134
|
-
- Serializers (aka presenters, views)
|
135
|
-
|
136
|
-
Each of these objects can be configured to behave in certain ways that may be dependent on the user or role making the request to interact with them.
|
137
|
-
|
138
|
-
Most API requests can be thought of in the following ways:
|
139
|
-
|
140
|
-
```ruby
|
141
|
-
# A Typical read request ( query / filter or detail view )
|
142
|
-
|
143
|
-
response = present( this_resource ) # resource -> filter context
|
144
|
-
.to(this_user) # filter context: relevant for this user
|
145
|
-
.in(this_presentation) # serializer: different slices / renderings
|
146
|
-
|
147
|
-
response.cache_key # russian doll style / max-updated-at friendly
|
148
|
-
response.etag # http client conditional get
|
149
|
-
```
|
150
|
-
|
151
|
-
The filter context and serializer classes make this easy. They also
|
152
|
-
make writing -- or rather, generating -- documentation and tests very
|
153
|
-
easy as well.
|
154
|
-
|
155
|
-
```ruby
|
156
|
-
# Typical mutation request ( create, update, delete )
|
157
|
-
|
158
|
-
outcome = run(this_command)
|
159
|
-
.as(this_user)
|
160
|
-
.against(this_set_of_one_or_more_records)
|
161
|
-
.with(these_arguments)
|
162
|
-
|
163
|
-
outcome.success?
|
164
|
-
|
165
|
-
outcome.error_messages
|
166
|
-
|
167
|
-
outcome.result
|
168
|
-
```
|
169
|
-
|
170
|
-
The command class determines the specifics of the above style of
|
171
|
-
request.
|
172
|
-
|
173
|
-
### The Filter Context
|
174
|
-
|
175
|
-
The filter context system is used to standardize the way we write
|
176
|
-
typical index and show actions in a typical Rails app. A user is
|
177
|
-
requesting to view a set of records, or an individual records.
|
178
|
-
|
179
|
-
Given a user making a request to view a specific resource, we arrive at
|
180
|
-
the 'filter context'. The filter context is responsible for 'scoping' a
|
181
|
-
resource to the set of records that user is permitted to view.
|
182
|
-
|
183
|
-
Based on the combination of parameters used to build that filter, we
|
184
|
-
compute a cache key that simplifies the process of server caching and
|
185
|
-
http client caching at the same time.
|
186
|
-
|
187
|
-
The filter context itself and the available parameters and their allowed
|
188
|
-
values are specified by the DSL, which simplifies the process of writing
|
189
|
-
complex queries and also provides configuration meta-data that aids in
|
190
|
-
the process of developing client user interfaces, API documentation, and
|
191
|
-
test code.
|
192
|
-
|
193
|
-
### Commands
|
194
|
-
|
195
|
-
The command class allows you to declare the available parameters, the
|
196
|
-
required values, their data types, etc. It also allows you to declare
|
197
|
-
which users can run the command, and further restrict the parameters
|
198
|
-
allowed and the values they accept.
|
199
|
-
|
200
|
-
### Serializers
|
201
|
-
|
202
|
-
- ActiveModel Serializers
|
203
|
-
- Documentation DSL
|
204
|
-
- Metadata for inspection + documentation generation
|
205
|
-
|
206
|
-
## API Documentation & Integration Tests
|
207
|
-
|
208
|
-
- rspec_api_documentation gem
|
209
|
-
- plan: take advantage of metadata defined above to auto-generate
|
210
|
-
documentation with the ability to pass expectation blocks as pass /
|
211
|
-
fail indicators
|
data/datapimp.gemspec
CHANGED
@@ -18,11 +18,11 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
19
|
|
20
20
|
spec.add_dependency 'pry'
|
21
|
-
spec.add_dependency 'hashie'
|
21
|
+
spec.add_dependency 'hashie'
|
22
22
|
spec.add_dependency 'colored'
|
23
23
|
spec.add_dependency 'commander'
|
24
24
|
spec.add_dependency 'fog-aws'
|
25
|
-
spec.add_dependency 'dropbox-api'
|
25
|
+
spec.add_dependency 'dropbox-api', '> 0.4.3'
|
26
26
|
spec.add_dependency 'google_drive'
|
27
27
|
spec.add_dependency 'rack-contrib'
|
28
28
|
spec.add_dependency 'uri_template'
|
data/lib/datapimp/cli/config.rb
CHANGED
@@ -1,3 +1,20 @@
|
|
1
|
+
command 'config' do |c|
|
2
|
+
c.syntax = "datapimp config [OPTIONS]"
|
3
|
+
c.description = "Shows the configuration options being used"
|
4
|
+
|
5
|
+
c.option '--env', "Output compatible with .env files"
|
6
|
+
|
7
|
+
c.action do |args, options|
|
8
|
+
Datapimp.config.current.to_hash.each do |key, value|
|
9
|
+
if options.env
|
10
|
+
puts "#{ key.to_s.upcase }= '#{ value }'"
|
11
|
+
else
|
12
|
+
puts "#{key}: #{value}"
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
1
18
|
command 'config set' do |c|
|
2
19
|
c.syntax = 'datapimp config set KEY=VALUE KEY=VALUE [options]'
|
3
20
|
c.description = 'manipulate configuration settings'
|
@@ -0,0 +1,15 @@
|
|
1
|
+
command "list spreadsheets" do |c|
|
2
|
+
c.syntax = "datapimp list spreadsheets"
|
3
|
+
c.description = "list the spreadsheets which can be used as datasources"
|
4
|
+
|
5
|
+
c.option '--type TYPE', String, "What type of source data is this? #{ Datapimp::Sync.data_source_types.join(", ") }"
|
6
|
+
|
7
|
+
Datapimp::Cli.accepts_keys_for(c, :google, :dropbox)
|
8
|
+
|
9
|
+
c.action do |args, options|
|
10
|
+
Datapimp::Sync.google.spreadsheets.each do |sheet|
|
11
|
+
puts "#{ sheet.key }\t\t#{ sheet.title }"
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Datapimp::DataSync
|
2
|
+
def self.sync_google_spreadsheet(options, *args)
|
3
|
+
require 'google_drive' unless defined?(::GoogleDrive)
|
4
|
+
|
5
|
+
raise 'Must setup google client' unless Datapimp::Sync.google.spreadsheets
|
6
|
+
|
7
|
+
key = args.shift
|
8
|
+
name = args.shift || "Spreadsheet"
|
9
|
+
|
10
|
+
raise 'Must supply a spreadsheet key' unless key
|
11
|
+
|
12
|
+
spreadsheet = Datapimp::Sources::GoogleSpreadsheet.new(name, key: key)
|
13
|
+
|
14
|
+
if options.output
|
15
|
+
Pathname(options.output).open("w+") do |fh|
|
16
|
+
fh.write(spreadsheet.to_s)
|
17
|
+
end
|
18
|
+
else
|
19
|
+
puts spreadsheet.to_s
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
data/lib/datapimp/cli/sync.rb
CHANGED
@@ -3,11 +3,23 @@ command "sync folder" do |c|
|
|
3
3
|
c.syntax = "datapimp sync folder LOCAL_PATH REMOTE_PATH [OPTIONS]"
|
4
4
|
|
5
5
|
c.option '--type TYPE', String, "Which service is hosting the folder"
|
6
|
+
c.option '--action ACTION', String, "Which sync action to run? push, pull"
|
6
7
|
|
7
8
|
Datapimp::Cli.accepts_keys_for(c, :amazon, :google, :github, :dropbox)
|
8
9
|
|
9
10
|
c.action do |args, options|
|
11
|
+
options.default(action:"pull", type: "dropbox")
|
10
12
|
|
13
|
+
local, remote = args
|
14
|
+
|
15
|
+
folder = case
|
16
|
+
when options.type == "dropbox"
|
17
|
+
Datapimp::Sync::DropboxFolder.new(local: local, remote: remote)
|
18
|
+
when options.type == "google"
|
19
|
+
Datapimp::Sync::GoogleDriveFolder.new(local: local, remote: remote)
|
20
|
+
end
|
21
|
+
|
22
|
+
folder.run(options.action)
|
11
23
|
end
|
12
24
|
end
|
13
25
|
|
@@ -20,10 +32,13 @@ command "sync data" do |c|
|
|
20
32
|
c.option '--columns NAMES', Array, "Extract only these columns"
|
21
33
|
|
22
34
|
c.example "Syncing an excel file from dropbox ", "datapimp sync data --type dropbox --columns name,description --dropbox-app-key ABC --dropbox-app-secret DEF --dropbox-client-token HIJ --dropbox-client-secret JKL spreadsheets/test.xslx"
|
35
|
+
c.example "Syncing a google spreadsheet", "datapimp sync data --type google-spreadsheet WHATEVER_THE_KEY_IS"
|
23
36
|
|
24
37
|
Datapimp::Cli.accepts_keys_for(c, :google, :github, :dropbox)
|
25
38
|
|
26
39
|
c.action do |args, options|
|
27
|
-
|
40
|
+
if options.type == "google-spreadsheet"
|
41
|
+
Datapimp::DataSync.sync_google_spreadsheet(options, args)
|
42
|
+
end
|
28
43
|
end
|
29
44
|
end
|
@@ -72,27 +72,6 @@ module Datapimp
|
|
72
72
|
end
|
73
73
|
end
|
74
74
|
|
75
|
-
def sync(local_path, remote_path, options={})
|
76
|
-
sync_folders(local_path, remote_path, options)
|
77
|
-
end
|
78
|
-
|
79
|
-
# Instructs Datapimp to create a syncable folder
|
80
|
-
# between the local and remote path, and optionally
|
81
|
-
# modifies the middleman config for the site
|
82
|
-
def sync_folders(local_path, remote_path, options={})
|
83
|
-
app = options[:app]
|
84
|
-
|
85
|
-
folder = Datapimp::Sync::Folder.new(local_path: local_path,
|
86
|
-
remote_path: remote_path,
|
87
|
-
app: app).synced
|
88
|
-
|
89
|
-
if !!options[:append_config] == true
|
90
|
-
Datapimp.append_config(folder.config_line)
|
91
|
-
end
|
92
|
-
|
93
|
-
folder
|
94
|
-
end
|
95
|
-
|
96
75
|
def method_missing meth, *args, &block
|
97
76
|
if api.respond_to?(meth)
|
98
77
|
return api.send(meth, *args, &block)
|
@@ -0,0 +1,212 @@
|
|
1
|
+
module Datapimp::Sources
|
2
|
+
class GoogleSpreadsheet < Datapimp::Sources::Base
|
3
|
+
requires :key
|
4
|
+
|
5
|
+
attr_accessor :key,
|
6
|
+
:session,
|
7
|
+
:name
|
8
|
+
|
9
|
+
def initialize name, options={}
|
10
|
+
@options = options
|
11
|
+
|
12
|
+
if name.is_a?(GoogleDrive::Spreadsheet)
|
13
|
+
@spreadsheet = name
|
14
|
+
@name = @spreadsheet.title
|
15
|
+
@key = @spreadsheet.key
|
16
|
+
end
|
17
|
+
|
18
|
+
@key ||= options[:key]
|
19
|
+
@session ||= options.fetch(:session) { Datapimp::Sync.google.api }
|
20
|
+
|
21
|
+
ensure_valid_options!
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.create_from_file(path, title)
|
25
|
+
if find_by_title(title)
|
26
|
+
raise 'Spreadsheet with this title already exists'
|
27
|
+
end
|
28
|
+
|
29
|
+
session.upload_from_file(path, title, :content_type => "text/csv")
|
30
|
+
|
31
|
+
find_by_title(title)
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.[](key_or_title)
|
35
|
+
find_by_key(key_or_title) || find_by_title(key_or_title)
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.find_by_key(key)
|
39
|
+
sheet = session_spreadsheets.detect do |spreadsheet|
|
40
|
+
spreadsheet.key == key
|
41
|
+
end
|
42
|
+
|
43
|
+
sheet && new(sheet, session: Datapimp::Sync.google.session)
|
44
|
+
end
|
45
|
+
|
46
|
+
def self.find_by_title title
|
47
|
+
sheet = session_spreadsheets.detect do |spreadsheet|
|
48
|
+
spreadsheet.title.match(title)
|
49
|
+
end
|
50
|
+
|
51
|
+
sheet && new(sheet, session: Datapimp::Sync.google.session)
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.session_spreadsheets
|
55
|
+
@session_spreadsheets ||= Datapimp::Sync.google.api.spreadsheets
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.create_from_data(data, options={})
|
59
|
+
require 'csv'
|
60
|
+
|
61
|
+
headers = Array(options[:headers]).map(&:to_s)
|
62
|
+
|
63
|
+
tmpfile = "tmp-csv.csv"
|
64
|
+
|
65
|
+
CSV.open(tmpfile, "wb") do |csv|
|
66
|
+
csv << headers
|
67
|
+
|
68
|
+
data.each do |row|
|
69
|
+
csv << headers.map do |header|
|
70
|
+
row = row.stringify_keys
|
71
|
+
row[header.to_s]
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
spreadsheet = Datapimp::Sync.google.api.upload_from_file(tmpfile, options[:title], :content_type => "text/csv")
|
77
|
+
|
78
|
+
new(spreadsheet.title, key: spreadsheet.key)
|
79
|
+
end
|
80
|
+
|
81
|
+
|
82
|
+
def title
|
83
|
+
@name ||= spreadsheet.try(:title)
|
84
|
+
end
|
85
|
+
|
86
|
+
def edit_url
|
87
|
+
spreadsheet.human_url
|
88
|
+
end
|
89
|
+
|
90
|
+
def share_write_access_with *emails
|
91
|
+
acl = spreadsheet.acl
|
92
|
+
|
93
|
+
Array(emails).flatten.each do |email|
|
94
|
+
acl.push scope_type: "user",
|
95
|
+
with_key: false,
|
96
|
+
role: "writer",
|
97
|
+
scope: email
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def share_read_access_with *emails
|
102
|
+
acl = spreadsheet.acl
|
103
|
+
|
104
|
+
Array(emails).flatten.each do |email|
|
105
|
+
acl.push scope_type: "user",
|
106
|
+
with_key: false,
|
107
|
+
role: "reader",
|
108
|
+
scope: email
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def add_to_collection collection_title
|
113
|
+
collection = if collection_title.is_a?(GoogleDrive::Collection)
|
114
|
+
collection_title
|
115
|
+
else
|
116
|
+
session.collections.find do |c|
|
117
|
+
c.title == collection_title
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
if !collection
|
122
|
+
collection_names = session.collections.map(&:title)
|
123
|
+
raise 'Could not find collection in Google drive. Maybe you mean: ' + collection_names.join(', ')
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def spreadsheet_key
|
128
|
+
key
|
129
|
+
end
|
130
|
+
|
131
|
+
def stale?
|
132
|
+
(!need_to_refresh? && (age > max_age)) || fresh_on_server?
|
133
|
+
end
|
134
|
+
|
135
|
+
def fresh_on_server?
|
136
|
+
refreshed_at.to_i > 0 && (last_updated_at > refreshed_at)
|
137
|
+
end
|
138
|
+
|
139
|
+
def last_updated_at
|
140
|
+
if value = spreadsheet.document_feed_entry_internal.css('updated').try(:text) rescue nil
|
141
|
+
DateTime.parse(value).to_i
|
142
|
+
else
|
143
|
+
Time.now.to_i
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
def fetch
|
148
|
+
self.raw = process_worksheets
|
149
|
+
end
|
150
|
+
|
151
|
+
def preprocess
|
152
|
+
single? ? raw.values.flatten : raw
|
153
|
+
end
|
154
|
+
|
155
|
+
protected
|
156
|
+
|
157
|
+
def process_worksheets
|
158
|
+
worksheets.inject({}.to_mash) do |memo, parts|
|
159
|
+
k, ws = parts
|
160
|
+
header_row = Array(ws.rows[0])
|
161
|
+
column_names = header_row.map {|cell| "#{ cell }".parameterize.underscore }
|
162
|
+
rows = ws.rows.slice(1, ws.rows.length)
|
163
|
+
|
164
|
+
row_index = 1
|
165
|
+
memo[k] = rows.map do |row|
|
166
|
+
col_index = 0
|
167
|
+
|
168
|
+
_record = column_names.inject({}) do |record, field|
|
169
|
+
record[field] = "#{ row[col_index] }".strip
|
170
|
+
record["_id"] = row_index
|
171
|
+
col_index += 1
|
172
|
+
record
|
173
|
+
end
|
174
|
+
|
175
|
+
row_index += 1
|
176
|
+
|
177
|
+
_record
|
178
|
+
end
|
179
|
+
|
180
|
+
memo
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def single?
|
185
|
+
worksheets.length == 1
|
186
|
+
end
|
187
|
+
|
188
|
+
def header_rows_for_worksheet key
|
189
|
+
if key.is_a?(Fixnum)
|
190
|
+
_worksheets[key]
|
191
|
+
else
|
192
|
+
worksheets.fetch(key)
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
def worksheets
|
197
|
+
@worksheets ||= _worksheets.inject({}.to_mash) do |memo,ws|
|
198
|
+
key = ws.title.strip.downcase.underscore.gsub(/\s+/,'_')
|
199
|
+
memo[key] = ws
|
200
|
+
memo
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
def _worksheets
|
205
|
+
@_worksheets ||= spreadsheet.worksheets
|
206
|
+
end
|
207
|
+
|
208
|
+
def spreadsheet
|
209
|
+
@spreadsheet ||= session.spreadsheet_by_key(spreadsheet_key)
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
@@ -0,0 +1,198 @@
|
|
1
|
+
# The `Datapimp::Sources` module houses the various
|
2
|
+
# types of remote data stores we are reading and converting into
|
3
|
+
# a JSON array of objects that gets cached on our filesystem.
|
4
|
+
#
|
5
|
+
module Datapimp
|
6
|
+
module Sources
|
7
|
+
class Base
|
8
|
+
attr_reader :options, :name
|
9
|
+
attr_accessor :raw, :processed, :format, :scopes, :slug_column, :refreshed_at, :path
|
10
|
+
|
11
|
+
class << self
|
12
|
+
attr_accessor :required_options
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.requires *args
|
16
|
+
self.required_options = args
|
17
|
+
end
|
18
|
+
|
19
|
+
def initialize(name, options={})
|
20
|
+
@name ||= name
|
21
|
+
@options ||= options
|
22
|
+
@format ||= options.fetch(:format, :json)
|
23
|
+
@path ||= options.fetch(:path) { Pathname(Dir.pwd()) }
|
24
|
+
|
25
|
+
@slug_column = options.fetch(:slug_column, :_id)
|
26
|
+
|
27
|
+
ensure_valid_options!
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_s
|
31
|
+
data.to_json
|
32
|
+
end
|
33
|
+
|
34
|
+
# defines a scope for the records in this data source
|
35
|
+
# a scope is a named filter, implemented in the form of a block
|
36
|
+
# which is passed each record. if the block returns true, it returns
|
37
|
+
# the record:
|
38
|
+
#
|
39
|
+
# Example:
|
40
|
+
#
|
41
|
+
# data_source(:galleries) do
|
42
|
+
# scope :active, -> {|record| record.state == "active" }
|
43
|
+
# end
|
44
|
+
def scope(*args, block)
|
45
|
+
name = args.first
|
46
|
+
(self.scopes ||= {})[name.to_sym] = block
|
47
|
+
end
|
48
|
+
|
49
|
+
def has_scope?(scope_name)
|
50
|
+
scope_name && (self.scopes ||= {}).key?(scope_name.to_sym)
|
51
|
+
end
|
52
|
+
|
53
|
+
# compute properties takes the raw data of each record
|
54
|
+
# and sets additional properties on the records which may
|
55
|
+
# not be persited in the data source
|
56
|
+
def compute_properties
|
57
|
+
self.processed && self.processed.map! do |row|
|
58
|
+
if slug_column && row.respond_to?(slug_column)
|
59
|
+
row.slug = row.send(slug_column).to_s.parameterize
|
60
|
+
end
|
61
|
+
|
62
|
+
row
|
63
|
+
end
|
64
|
+
|
65
|
+
processors.each do |processor|
|
66
|
+
original = self.processed.dup
|
67
|
+
modified = []
|
68
|
+
|
69
|
+
original.each_with_index do |record, index|
|
70
|
+
previous = original[index - 1]
|
71
|
+
modified.push(processor.call(record, index, previous: previous, set: original))
|
72
|
+
end
|
73
|
+
|
74
|
+
self.processed = modified
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def processors &block
|
79
|
+
@processors ||= []
|
80
|
+
@processors << block if block_given?
|
81
|
+
@processors
|
82
|
+
end
|
83
|
+
|
84
|
+
# makes sure that the required options for this data source
|
85
|
+
# are passed for any instance of the data source
|
86
|
+
def ensure_valid_options!
|
87
|
+
missing_options = (Array(self.class.required_options) - options.keys.map(&:to_sym))
|
88
|
+
|
89
|
+
missing_options.reject! do |key|
|
90
|
+
respond_to?(key) && !send(key).nil?
|
91
|
+
end
|
92
|
+
|
93
|
+
if missing_options.length > 0
|
94
|
+
raise 'Error: failure to supply the following options: ' + missing_options.map(&:to_s).join(",")
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def select(&block)
|
99
|
+
data.select(&block)
|
100
|
+
end
|
101
|
+
|
102
|
+
def refresh
|
103
|
+
fetch
|
104
|
+
process
|
105
|
+
self.refreshed_at = Time.now.to_i
|
106
|
+
self
|
107
|
+
end
|
108
|
+
|
109
|
+
def refresh_if_stale?
|
110
|
+
refresh! if stale?
|
111
|
+
end
|
112
|
+
|
113
|
+
# A data source is stale if it has been populated
|
114
|
+
# and the age is greater than the max age we allow.
|
115
|
+
def stale?
|
116
|
+
!need_to_refresh? && (age > max_age)
|
117
|
+
end
|
118
|
+
|
119
|
+
def fresh_on_server?
|
120
|
+
need_to_refresh?
|
121
|
+
end
|
122
|
+
|
123
|
+
def max_age
|
124
|
+
max = ENV['MAX_DATA_SOURCE_AGE']
|
125
|
+
(max && max.to_i) || 120
|
126
|
+
end
|
127
|
+
|
128
|
+
# how long since this data source has been refreshed?
|
129
|
+
def age
|
130
|
+
Time.now.to_i - refreshed_at.to_i
|
131
|
+
end
|
132
|
+
|
133
|
+
def data
|
134
|
+
refresh if need_to_refresh?
|
135
|
+
processed
|
136
|
+
end
|
137
|
+
|
138
|
+
def refresh!
|
139
|
+
refresh
|
140
|
+
save_to_disk
|
141
|
+
end
|
142
|
+
|
143
|
+
def need_to_refresh?
|
144
|
+
!(@fetched && @_processed)
|
145
|
+
end
|
146
|
+
|
147
|
+
def fetch
|
148
|
+
@fetched = true
|
149
|
+
self.raw = []
|
150
|
+
end
|
151
|
+
|
152
|
+
def preprocess
|
153
|
+
self.raw.dup
|
154
|
+
end
|
155
|
+
|
156
|
+
def process
|
157
|
+
@_processed = true
|
158
|
+
self.processed = preprocess
|
159
|
+
# set_id
|
160
|
+
compute_properties
|
161
|
+
self.processed
|
162
|
+
end
|
163
|
+
|
164
|
+
def refreshed_at
|
165
|
+
return @refreshed_at if @refreshed_at.to_i > 0
|
166
|
+
|
167
|
+
if path_to_file.exist?
|
168
|
+
@refreshed_at = File.mtime(path.join(file)).to_i
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def save_to_disk
|
173
|
+
unless path_to_file.dirname.exist?
|
174
|
+
FileUtils.mkdir(path_to_file.dirname)
|
175
|
+
end
|
176
|
+
|
177
|
+
path_to_file.open('w+') {|fh| fh.write(to_s) }
|
178
|
+
end
|
179
|
+
|
180
|
+
def persisted?
|
181
|
+
path_to_file && path_to_file.exist?
|
182
|
+
end
|
183
|
+
|
184
|
+
def file
|
185
|
+
@file ||= name.parameterize if name.respond_to?(:parameterize)
|
186
|
+
@file.gsub!("-","_")
|
187
|
+
@file = "#{@file}.json" unless @file.match(/\.json/i)
|
188
|
+
@file
|
189
|
+
end
|
190
|
+
|
191
|
+
def path_to_file
|
192
|
+
Pathname(path).join("#{ file }")
|
193
|
+
end
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
Dir[Datapimp.lib.join("datapimp/sources/**/*.rb")].each {|f| require(f) }
|
@@ -1,10 +1,37 @@
|
|
1
1
|
module Datapimp
|
2
|
-
class Sync::DropboxFolder <
|
3
|
-
|
2
|
+
class Sync::DropboxFolder < Hashie::Mash
|
3
|
+
def dropbox
|
4
|
+
@dropbox ||= Datapimp::Sync.dropbox
|
5
|
+
end
|
6
|
+
|
7
|
+
def delta
|
8
|
+
@delta ||= dropbox.delta(cursor, remote_path)
|
9
|
+
end
|
10
|
+
|
11
|
+
def local_path
|
12
|
+
Pathname(local)
|
13
|
+
end
|
14
|
+
|
15
|
+
def remote_path
|
16
|
+
Datapimp::Sync.dropbox.ls(remote)
|
17
|
+
end
|
18
|
+
|
19
|
+
def cursor
|
20
|
+
cursor_path.exist? && cursor_path.read
|
21
|
+
end
|
22
|
+
|
23
|
+
def cursor_path
|
24
|
+
local_path.join('.dropbox-cursor')
|
25
|
+
end
|
26
|
+
|
27
|
+
def run(action)
|
28
|
+
action = action.to_sym
|
29
|
+
|
30
|
+
if action == :push
|
4
31
|
|
32
|
+
elsif action == :pull
|
5
33
|
|
6
|
-
|
7
|
-
Datapimp.dropbox(token: client_token, secret: client_secret)
|
34
|
+
end
|
8
35
|
end
|
9
36
|
end
|
10
37
|
end
|
data/lib/datapimp/version.rb
CHANGED
data/lib/datapimp.rb
CHANGED
@@ -3,6 +3,7 @@ require 'set'
|
|
3
3
|
require 'pathname'
|
4
4
|
require 'hashie'
|
5
5
|
require 'datapimp/core_ext'
|
6
|
+
require 'active_support'
|
6
7
|
|
7
8
|
module Datapimp
|
8
9
|
def self.config
|
@@ -29,5 +30,7 @@ end
|
|
29
30
|
|
30
31
|
require 'datapimp/version'
|
31
32
|
require 'datapimp/configuration'
|
32
|
-
require 'datapimp/
|
33
|
+
require 'datapimp/sources'
|
33
34
|
require 'datapimp/sync'
|
35
|
+
require 'datapimp/sync/dropbox_folder'
|
36
|
+
require 'datapimp/sync/google_drive_folder'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datapimp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jonathan Soeder
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 0
|
33
|
+
version: '0'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 0
|
40
|
+
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: colored
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -84,16 +84,16 @@ dependencies:
|
|
84
84
|
name: dropbox-api
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
|
-
- - "
|
87
|
+
- - ">"
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version:
|
89
|
+
version: 0.4.3
|
90
90
|
type: :runtime
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
|
-
- - "
|
94
|
+
- - ">"
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version:
|
96
|
+
version: 0.4.3
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: google_drive
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -296,7 +296,9 @@ files:
|
|
296
296
|
- lib/datapimp/cli.rb
|
297
297
|
- lib/datapimp/cli/01_extensions.rb
|
298
298
|
- lib/datapimp/cli/config.rb
|
299
|
+
- lib/datapimp/cli/list.rb
|
299
300
|
- lib/datapimp/cli/setup.rb
|
301
|
+
- lib/datapimp/cli/support/data_sync.rb
|
300
302
|
- lib/datapimp/cli/sync.rb
|
301
303
|
- lib/datapimp/clients/amazon.rb
|
302
304
|
- lib/datapimp/clients/dropbox.rb
|
@@ -304,13 +306,13 @@ files:
|
|
304
306
|
- lib/datapimp/clients/google.rb
|
305
307
|
- lib/datapimp/configuration.rb
|
306
308
|
- lib/datapimp/core_ext.rb
|
307
|
-
- lib/datapimp/
|
308
|
-
- lib/datapimp/
|
309
|
-
- lib/datapimp/
|
310
|
-
- lib/datapimp/
|
311
|
-
- lib/datapimp/
|
312
|
-
- lib/datapimp/
|
313
|
-
- lib/datapimp/
|
309
|
+
- lib/datapimp/sources.rb
|
310
|
+
- lib/datapimp/sources/dropbox.rb
|
311
|
+
- lib/datapimp/sources/excel.rb
|
312
|
+
- lib/datapimp/sources/github.rb
|
313
|
+
- lib/datapimp/sources/google.rb
|
314
|
+
- lib/datapimp/sources/json.rb
|
315
|
+
- lib/datapimp/sources/nokogiri.rb
|
314
316
|
- lib/datapimp/sync.rb
|
315
317
|
- lib/datapimp/sync/dropbox_delta.rb
|
316
318
|
- lib/datapimp/sync/dropbox_folder.rb
|
@@ -1,10 +0,0 @@
|
|
1
|
-
# The `Datapimp::Dataources` module houses the various
|
2
|
-
# types of remote data stores we are reading and converting into
|
3
|
-
# a JSON array of objects that gets cached on our filesystem.
|
4
|
-
module Datapimp
|
5
|
-
module DataSources
|
6
|
-
|
7
|
-
end
|
8
|
-
end
|
9
|
-
|
10
|
-
Dir[Datapimp.lib.join("datapimp/data_sources/**/*.rb")].each {|f| require(f) }
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|