ferry 0.0.6 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +0 -2
- data/README.md +23 -202
- data/Rakefile +2 -2
- data/bin/ferry +3 -10
- data/ferry.gemspec +3 -6
- data/lib/ferry.rb +224 -83
- data/lib/ferry/version.rb +1 -1
- data/spec/{ferry_spec.rb → lib/ferry_spec.rb} +1 -2
- data/spec/spec_helper.rb +2 -80
- data/spec/support/data.rb +10 -10
- data/spec/support/models.rb +1 -2
- data/spec/support/schema.rb +14 -15
- metadata +4 -22
- data/dbfile.sqlite3 +0 -0
- data/lib/ferry/engine.rb +0 -16
- data/lib/ferry/logger.rb +0 -12
- data/lib/tasks/dump_task.rake +0 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 725496d9f272e4289334f35cd5f29dfb877fa77e
|
4
|
+
data.tar.gz: 62516d8f0982f15ebc85c52ea784b4e9e768b7d1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2170274c90daecbb1874cb4a498aaf7cbe7beb9a9342f5efdb088527aac875bc77960cb71855a40bb94bbbe9ae7c4166d5f134cba1ee930ceaa68f0ca5d30248
|
7
|
+
data.tar.gz: d0b93db1a4a7f403ab6beb891f2b8bf53711b2c2672d1ff198a9d683326107dbb8e75438ca0a4aafa0b32e067fb3bf9d853ff07d478cec509493080fa99ce45f
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,23 +1,35 @@
|
|
1
1
|
# Ferry
|
2
2
|
|
3
|
+
## What is Ferry?
|
3
4
|
Ferry is a data migration and data manipulation tool that seeks to quickly and easily reduce overhead when dealing with big data problems.
|
4
5
|
|
5
|
-
##
|
6
|
+
## What can I use Ferry for? (Use Cases)
|
7
|
+
See the [ferry_demo](http://github.com/cmu-is-projects/ferry_demo.com) ROR/Sqlite app for guidance on using Ferry!
|
6
8
|
|
9
|
+
Manipulation Use Cases
|
10
|
+
- RESTful column/ row interaction
|
11
|
+
|
12
|
+
Migration
|
13
|
+
- Exporting data to various file formats (.csv, .sql, .yml)
|
14
|
+
- Importing data from various file formats
|
15
|
+
- Migrating data to third party hosts (Amazon S3, Oracle)
|
16
|
+
- Migrating data to a different database
|
17
|
+
|
18
|
+
## TO-DOs
|
7
19
|
- [ ] Refactoring before public release
|
8
|
-
|
9
|
-
|
10
|
-
|
20
|
+
- [x] Define action-items for refactor
|
21
|
+
- [x] Provide working example(s) of using ferry (ferry_demo app)
|
22
|
+
- [ ] CLI tool
|
23
|
+
- [ ] Simple CSV export
|
24
|
+
- [ ] using sqlite3
|
25
|
+
- [ ] using psql
|
26
|
+
- [ ] RESTful column interaction
|
27
|
+
- [ ] Understanding relationships between generating migrations and migration files in place
|
11
28
|
- [ ] Tests
|
12
|
-
|
13
|
-
- [ ]
|
14
|
-
- [ ] Migration Scenarios - dummy class migration
|
15
|
-
- [ ] Refactor logging logic into Logger class
|
16
|
-
- [x] Initial revision
|
17
|
-
- [ ] Review
|
29
|
+
- [ ] Rolling back on errors / mishaps during migrations and manipulations
|
30
|
+
- [ ] Host documentation site via GitHub pages
|
18
31
|
|
19
32
|
## Installation
|
20
|
-
|
21
33
|
Add this line to your application's Gemfile:
|
22
34
|
|
23
35
|
gem 'ferry'
|
@@ -30,197 +42,6 @@ Or install it yourself as:
|
|
30
42
|
|
31
43
|
$ gem install ferry
|
32
44
|
|
33
|
-
## Usage
|
34
|
-
|
35
|
-
Usage pending. See examples / submit PR's for your ideas.
|
36
|
-
|
37
|
-
## Example(s)
|
38
|
-
###### 3 September 2014
|
39
|
-
Use Case Ideas
|
40
|
-
|
41
|
-
Note: Demo app can initially function with RoR and Postgres.
|
42
|
-
|
43
|
-
Manipulation Use Cases
|
44
|
-
- CRUD for Columns
|
45
|
-
- Copy & Paste Columns
|
46
|
-
- CRUD for Rows
|
47
|
-
- Understanding relationships between generating migrations and migration files in place
|
48
|
-
|
49
|
-
Migration
|
50
|
-
- Exporting data to various file formats (.csv, .sql, .yml)
|
51
|
-
- Importing data from various file formats
|
52
|
-
- Migrating data to third party hosts (Amazon S3, Oracle)
|
53
|
-
- Migrating data to a different database
|
54
|
-
|
55
|
-
Important things to consider and remember
|
56
|
-
- Rolling back on errors / mishaps during migrations and manipulations
|
57
|
-
- Host documentation site via GitHub pages
|
58
|
-
|
59
|
-
|
60
|
-
###### 30 August 2014
|
61
|
-
Below is an initial implementation of how ferry will work
|
62
|
-
|
63
|
-
```
|
64
|
-
# encoding: UTF-8
|
65
|
-
require 'consortium'
|
66
|
-
|
67
|
-
task :load_wm_design do
|
68
|
-
class WmDesign < Design
|
69
|
-
self.table_name = :wm_design
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
namespace :consortium_example do
|
74
|
-
desc "writes design cigs to individual xml files using consortium"
|
75
|
-
task :write_local => [:load_wm_design] do
|
76
|
-
hostname = Socket.gethostname
|
77
|
-
FileUtils.mkdir "consortium_migration_#{hostname}" unless Dir["consortium_migration_#{hostname}"].present?
|
78
|
-
homedir = "consortium_migration_#{hostname}"
|
79
|
-
|
80
|
-
range = Design.where("savedate > ?", 15.hours.ago.strftime("%d.%m.%Y %H").to_datetime)
|
81
|
-
|
82
|
-
consortium_runtime = Benchmark.measure do
|
83
|
-
range.migrate({max_workers: 4, batch_size: 500}) do |collection|
|
84
|
-
collection.each do |design|
|
85
|
-
cons_place_design_content_in_batch(design, homedir, design.composite_id)
|
86
|
-
end
|
87
|
-
end
|
88
|
-
end
|
89
|
-
puts "#{consortium_runtime}"
|
90
|
-
end
|
91
|
-
|
92
|
-
private
|
93
|
-
|
94
|
-
def cons_place_design_content_in_batch(design, homedir, composite_id)
|
95
|
-
begin
|
96
|
-
create_xml_file(homedir, composite_id, design)
|
97
|
-
rescue Exception => e
|
98
|
-
File.rename("#{homedir}/#{composite_id}.xml", "#{homedir}/#{composite_id}.xml.failed")
|
99
|
-
raise e
|
100
|
-
end
|
101
|
-
end
|
102
|
-
|
103
|
-
def create_xml_file(homedir, composite_id, design)
|
104
|
-
design.updated_at ? updated_at = design.updated_at.to_time : updated_at = design.created_at.to_time
|
105
|
-
FileUtils.touch "#{homedir}/#{composite_id}.xml"
|
106
|
-
file = File.open("#{homedir}/#{composite_id}.xml", 'w')
|
107
|
-
file.puts design.content
|
108
|
-
file.close
|
109
|
-
FileUtils.touch "#{homedir}/#{composite_id}.xml", :mtime => updated_at
|
110
|
-
end
|
111
|
-
end
|
112
|
-
```
|
113
|
-
|
114
|
-
###### 29 July 2014
|
115
|
-
Version 0.0.1 is functional with the rake task defined here :: https://github.com/customink/design_content_migration/blob/master/lib/tasks/ferry_example.rake#L10
|
116
|
-
|
117
|
-
Please manually install ferry from your locally cloned repo ...
|
118
|
-
```
|
119
|
-
git clone git@github.com:customink/ferry.git
|
120
|
-
cd ferry
|
121
|
-
gem build ferry.gemspec
|
122
|
-
gem install ferry
|
123
|
-
```
|
124
|
-
add it to your app's Gemfile
|
125
|
-
```
|
126
|
-
gem 'ferry'
|
127
|
-
```
|
128
|
-
and then
|
129
|
-
```
|
130
|
-
bundle install
|
131
|
-
```
|
132
|
-
as it has not been pushed to rubygems.com yet.
|
133
|
-
|
134
|
-
Tests - Coming soon to an editor near me!
|
135
|
-
|
136
|
-
###### 28 July 2014
|
137
|
-
Ferry should not support Oracle.
|
138
|
-
|
139
|
-
###### 25 July 2014
|
140
|
-
After a few more reviews with @metaskills, @gilr00y, @jdlehman, and @danielwheeler1987, Ferry will extend ActiveRecord with a "migrate" (more legit name search still in naming progress) method. From there we are going to pass the same relation to find in batches to a worker which will plow through the batch passed to it via a yield call from the task.
|
141
|
-
|
142
|
-
Tests will include; validate the data passed into the worker (log) and testing that there is an ActiveRecord::Relation being passed to find_in_batches.
|
143
|
-
|
144
|
-
###### 23 July 2014
|
145
|
-
After a few chats with @gilr00y and @jdlehman Ferry may extend ActiveRecord with a "migrate" method we could call on an ActiveRecord object. From there that object would call an Engine instance with appropriate fields to kickoff the actual data migration.
|
146
|
-
|
147
|
-
There is some logic duplication and layer duplication between the Engine class and the "migrate" method that extends ActiveRecord. Still working out how to concisely write logic that handles the management of forking connection and engine init calls.
|
148
|
-
|
149
|
-
```
|
150
|
-
require "ferry/version"
|
151
|
-
require 'models/engine'
|
152
|
-
require 'models/logger'
|
153
|
-
|
154
|
-
module Ferry
|
155
|
-
class ActiveRecord
|
156
|
-
def self.migrate(&block)
|
157
|
-
yield
|
158
|
-
end
|
159
|
-
end
|
160
|
-
end
|
161
|
-
```
|
162
|
-
|
163
|
-
This implementation should be able to run something like this ...
|
164
|
-
|
165
|
-
```
|
166
|
-
engine = Engine.new(
|
167
|
-
Design.where("savedate > ?", 6.months.ago.strftime("%d.%m.%Y %H").to_datetime).id,
|
168
|
-
Design.where("savedate > ?", 3.months.ago.strftime("%d.%m.%Y %H").to_datetime).id,
|
169
|
-
100_000,
|
170
|
-
1_000,
|
171
|
-
"log/ferry"
|
172
|
-
)
|
173
|
-
|
174
|
-
Design.where("savedate > ?", 130.hours.ago.strftime("%d.%m.%Y %H").to_datetime).migrate(
|
175
|
-
engine.run do | start_id, end_id, chunk_size, batch_size, log |
|
176
|
-
worker.run do | start_id, chunk_size, batch_size, log |
|
177
|
-
worker_end_id = start_id + chunk_size - 1
|
178
|
-
Design.where("id >= ? && id <= ?", start_id, worker_end_id).find_in_batches(batch_size: batch_size) do |batch|
|
179
|
-
# move and manipulate data as you please
|
180
|
-
end
|
181
|
-
start_id += batch_size
|
182
|
-
end
|
183
|
-
end
|
184
|
-
)
|
185
|
-
|
186
|
-
```
|
187
|
-
|
188
|
-
###### 22 July 2014
|
189
|
-
After installing ferry to your local machine or bundling from your gemfile - in your migration task make sure to define your chunker as such ...
|
190
|
-
|
191
|
-
```
|
192
|
-
require 'ferry'
|
193
|
-
|
194
|
-
namespace :example do
|
195
|
-
task "my_migration_task" do
|
196
|
-
|
197
|
-
ferry = Engine.new(
|
198
|
-
:max_workers => number_of_workers ex:8,
|
199
|
-
:start_id => where_are_we_starting ex:2910, Model.first.id,
|
200
|
-
:end_id => where_are_we_ending ex:8190, Model.last.id,
|
201
|
-
:chunk_size => size_of_chunks_that_workers_will_process ex:42,
|
202
|
-
:working_dir => ex:"path/to/working_dir"
|
203
|
-
)
|
204
|
-
|
205
|
-
ferry.run do |start_id, chunk_size, log|
|
206
|
-
begin
|
207
|
-
work = Model.select(":id").where("? <= id and id < ?", start_id, start_id + chunk_size)
|
208
|
-
rows_to_process = rel.count
|
209
|
-
log.puts("rows_to_process: #{rows_to_process}")
|
210
|
-
work.find_in_batches(:batch_size => 1_000) do
|
211
|
-
# doing things and logging stuff as you please ...
|
212
|
-
end
|
213
|
-
rescue Exception => e
|
214
|
-
log.puts "Broken on id #{id}"
|
215
|
-
raise e
|
216
|
-
end
|
217
|
-
end
|
218
|
-
|
219
|
-
end
|
220
|
-
end
|
221
|
-
```
|
222
|
-
|
223
|
-
|
224
45
|
## Contributing
|
225
46
|
|
226
47
|
1. Fork it ( https://github.com/[my-github-username]/ferry/fork )
|
data/Rakefile
CHANGED
data/bin/ferry
CHANGED
@@ -1,12 +1,5 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
|
3
2
|
require 'ferry'
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
#ARGV[0] should be the ferry function they want to call
|
8
|
-
#subsequent params are the options to that function
|
9
|
-
|
10
|
-
if(ARGV[0] == "to_csv" )
|
11
|
-
export.to_csv
|
12
|
-
end
|
3
|
+
exporter = Ferry::Exporter.new
|
4
|
+
exporter.to_csv if ARGV[0] == 'to_csv'
|
5
|
+
exporter.to_new_db_type if ARGV[0] == 'to_new_db_type'
|
data/ferry.gemspec
CHANGED
@@ -15,20 +15,17 @@ Gem::Specification.new do |spec|
|
|
15
15
|
|
16
16
|
spec.files = `git ls-files -z`.split("\x0")
|
17
17
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
-
# spec.executables = ["ferry"]
|
19
18
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
20
19
|
spec.require_paths = ["lib"]
|
21
20
|
|
22
21
|
spec.add_development_dependency "activerecord"
|
23
22
|
spec.add_development_dependency "bundler", "~> 1.6"
|
24
|
-
spec.add_development_dependency "progressbar"
|
23
|
+
# spec.add_development_dependency "progressbar"
|
25
24
|
spec.add_development_dependency "rake"
|
26
25
|
spec.add_development_dependency "minitest"
|
27
26
|
spec.add_development_dependency "rspec"
|
28
|
-
# spec.add_development_dependency "yaml"
|
29
|
-
#to test db access
|
30
27
|
spec.add_development_dependency "pg"
|
31
28
|
spec.add_development_dependency "sqlite3"
|
32
|
-
|
33
|
-
#spec.
|
29
|
+
# need to look over installation problems with mysql :(
|
30
|
+
# spec.add_development_dependency "mysql"
|
34
31
|
end
|
data/lib/ferry.rb
CHANGED
@@ -1,118 +1,259 @@
|
|
1
|
-
require "ferry/version"
|
2
|
-
require "ferry/engine"
|
3
|
-
require "ferry/logger"
|
4
|
-
require "csv"
|
5
1
|
require 'active_record'
|
6
|
-
require '
|
2
|
+
require 'csv'
|
3
|
+
require 'ferry/version'
|
4
|
+
# require 'progressbar'
|
7
5
|
require 'yaml'
|
8
6
|
|
9
7
|
module Ferry
|
8
|
+
class Exporter
|
10
9
|
|
10
|
+
def which_db_env
|
11
|
+
ARGV[1]
|
12
|
+
end
|
11
13
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
# ActiveRecord::Base.connection
|
16
|
-
#ActiveRecord::Base.establish_connection(adapter: 'sqlite3', database: 'db/development.sqlite3') #need to automatically get db name
|
17
|
-
#puts ActiveRecord::Base.configurations[Rails.env]['adapter']
|
18
|
-
|
19
|
-
info = YAML::load(IO.read("config/database.yml")) #this holds all the db config information. pretty much a rosetta stone for dbs
|
20
|
-
db_type = info["production"]["adapter"] #this tells us the db rails is using
|
14
|
+
def switch_to_db_type
|
15
|
+
ARGV[2]
|
16
|
+
end
|
21
17
|
|
18
|
+
def to_csv
|
19
|
+
info = YAML::load(IO.read("config/database.yml"))
|
20
|
+
db_type = info[which_db_env||"production"]["adapter"]
|
22
21
|
|
23
|
-
# puts Rails.configuration#.database_configuration[Rails.env]
|
24
|
-
# puts ActiveRecord::Base.configurations[Rails.env]
|
25
22
|
|
26
|
-
# type = db_type.downcase
|
27
23
|
|
24
|
+
#issues:
|
25
|
+
# csv placement in directory: do we want it in the lib?
|
26
|
+
# the case where dev/test dbs are different from production db (sqlite for dev/test, pg for prod??)
|
27
|
+
# empty ARGs
|
28
|
+
# ARGs with invalid values
|
28
29
|
|
29
30
|
|
30
31
|
case db_type
|
31
|
-
|
32
|
-
|
33
|
-
|
32
|
+
when "sqlite3"
|
33
|
+
puts "operating with sqlite3"
|
34
34
|
|
35
|
+
if(which_db_env)
|
36
|
+
homedir = "lib/ferry_to_csv_#{which_db_env}"
|
37
|
+
ActiveRecord::Base.establish_connection(adapter: db_type, database: info[which_db_env]['database'])
|
38
|
+
puts "connected to #{which_db_env} env db"
|
39
|
+
FileUtils.mkdir homedir unless Dir[homedir].present?
|
40
|
+
puts "exporting tables to #{homedir}"
|
41
|
+
# sqlite_pbar = ProgressBar.new("sqlite_to_csv", 100)
|
42
|
+
ActiveRecord::Base.connection.tables.each do |model|
|
43
|
+
full_table = ActiveRecord::Base.connection.execute("SELECT * FROM #{model};")
|
44
|
+
# do not create a csv for an empty table
|
45
|
+
if !full_table[0].nil?
|
46
|
+
CSV.open("#{homedir}/#{model}.csv", "w") do |csv|
|
47
|
+
size = full_table[0].length / 2
|
48
|
+
keys = full_table[0].keys.first(size)
|
49
|
+
#first row contains column names
|
50
|
+
csv << keys
|
51
|
+
full_table.each do |row|
|
52
|
+
csv << row.values_at(*keys)
|
53
|
+
# sqlite_pbar.inc
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
else #no db_env is specified
|
35
59
|
info.keys.each do |environment|
|
36
60
|
|
37
|
-
|
38
|
-
|
61
|
+
if(environment == 'default') #in Rails 4.1+ environments inherit from default, which does not have database so we will not include it
|
62
|
+
next
|
63
|
+
end
|
64
|
+
|
65
|
+
homedir = "lib/ferry_to_csv_#{environment}"
|
66
|
+
ActiveRecord::Base.establish_connection(adapter: db_type, database: info[environment]['database']) #connect to sqlite3 file
|
67
|
+
puts "connected to #{environment} env db"
|
68
|
+
FileUtils.mkdir homedir unless Dir[homedir].present?
|
69
|
+
puts "exporting tables to #{homedir}"
|
70
|
+
# sqlite_pbar = ProgressBar.new("sqlite_to_csv", 100)
|
71
|
+
ActiveRecord::Base.connection.tables.each do |model| #for each model in the db
|
72
|
+
full_table = ActiveRecord::Base.connection.execute("SELECT * FROM #{model};") #get all the records
|
73
|
+
if !full_table[0].nil?
|
74
|
+
CSV.open("#{homedir}/#{model}.csv", "w") do |csv|
|
75
|
+
size = full_table[0].length / 2
|
76
|
+
keys = full_table[0].keys.first(size)
|
77
|
+
#first row contains column names
|
78
|
+
csv << keys
|
79
|
+
full_table.each do |row|
|
80
|
+
csv << row.values_at(*keys)
|
81
|
+
# sqlite_pbar.inc
|
82
|
+
end
|
83
|
+
end
|
39
84
|
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
40
88
|
|
41
|
-
|
89
|
+
when "postgresql"
|
90
|
+
puts "operating with postgres"
|
42
91
|
|
43
|
-
|
44
|
-
|
45
|
-
|
92
|
+
if(which_db_env)
|
93
|
+
homedir = "lib/ferry_to_csv_#{which_db_env}"
|
94
|
+
ActiveRecord::Base.establish_connection(
|
95
|
+
adapter: 'postgresql',
|
96
|
+
host: info[which_db_env]['host'] || 'localhost',
|
97
|
+
username: info[which_db_env]['username'],
|
98
|
+
password: info[which_db_env]['password'],
|
99
|
+
database: info[which_db_env]['database'],
|
100
|
+
encoding: info[which_db_env]['encoding']
|
101
|
+
)
|
102
|
+
puts "connected to #{which_db_env} env db"
|
103
|
+
FileUtils.mkdir homedir unless Dir[homedir].present?
|
104
|
+
puts "exporting tables to #{homedir}"
|
105
|
+
# psql_pbar = ProgressBar.new("psql_to_csv", 100)
|
106
|
+
ActiveRecord::Base.connection.tables.each do |model|
|
107
|
+
full_table = ActiveRecord::Base.connection.execute("SELECT * FROM #{model};")
|
108
|
+
# do not create a csv for an empty table
|
109
|
+
if full_table.num_tuples > 0
|
110
|
+
CSV.open("#{homedir}/#{model}.csv", "w") do |csv|
|
111
|
+
size = full_table[0].length / 2
|
112
|
+
keys = full_table[0].keys.first(size)
|
113
|
+
#first row contains column names
|
114
|
+
csv << keys
|
115
|
+
full_table.each do |row|
|
116
|
+
csv << row.values_at(*keys)
|
117
|
+
# psql_pbar.inc
|
118
|
+
end
|
46
119
|
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
else
|
123
|
+
info.keys.each do |environment|
|
47
124
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
125
|
+
if(environment == 'default') #in Rails 4.1+ environments inherit from default, which does not have database so we will not include it
|
126
|
+
next
|
127
|
+
end
|
128
|
+
homedir = "lib/ferry_to_csv_#{environment}"
|
52
129
|
|
53
|
-
|
54
|
-
|
130
|
+
ActiveRecord::Base.establish_connection(
|
131
|
+
adapter: 'postgresql',
|
132
|
+
host: info[environment]['host'] || 'localhost',
|
133
|
+
username: info[environment]['username'],
|
134
|
+
password: info[environment]['password'],
|
135
|
+
database: info[environment]['database'],
|
136
|
+
encoding: info[environment]['encoding']
|
137
|
+
)
|
138
|
+
puts "connected to #{environment} env db"
|
139
|
+
FileUtils.mkdir homedir unless Dir[homedir].present?
|
140
|
+
puts "exporting tables to #{homedir}"
|
141
|
+
# psql_pbar = ProgressBar.new("psql_to_csv", 100)
|
142
|
+
ActiveRecord::Base.connection.tables.each do |model|
|
143
|
+
full_table = ActiveRecord::Base.connection.execute("SELECT * FROM #{model};")
|
144
|
+
# do not create a csv for an empty table
|
145
|
+
if full_table.num_tuples > 0
|
146
|
+
CSV.open("#{homedir}/#{model}.csv", "w") do |csv|
|
147
|
+
size = full_table[0].length / 2
|
148
|
+
keys = full_table[0].keys.first(size)
|
149
|
+
#first row contains column names
|
150
|
+
csv << keys
|
151
|
+
full_table.each do |row|
|
152
|
+
csv << row.values_at(*keys)
|
153
|
+
# psql_pbar.inc
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
when "mysql2"
|
161
|
+
puts "operating with mysql2"
|
55
162
|
|
56
|
-
|
57
|
-
|
58
|
-
end
|
163
|
+
if(which_db_env)
|
164
|
+
homedir = "lib/ferry_to_csv_#{which_db_env}"
|
59
165
|
|
60
|
-
|
166
|
+
ActiveRecord::Base.establish_connection( #this may not work on default rails if production db is not created (must run rake db:create:all)
|
167
|
+
adapter: 'mysql2',
|
168
|
+
host: info[which_db_env]['host'] || 'localhost',
|
169
|
+
username: info[which_db_env]['username'],
|
170
|
+
password: info[which_db_env]['password'],
|
171
|
+
database: info[which_db_env]['database']
|
172
|
+
)
|
173
|
+
puts "connected to #{which_db_env} env db"
|
174
|
+
FileUtils.mkdir homedir unless Dir[homedir].present?
|
175
|
+
puts "exporting tables to #{homedir}"
|
176
|
+
# psql_pbar = ProgressBar.new("psql_to_csv", 100)
|
61
177
|
|
62
|
-
|
63
|
-
|
178
|
+
ActiveRecord::Base.connection.tables.each do |model| #for each model in the db
|
179
|
+
columns = ActiveRecord::Base.connection.execute("SELECT `COLUMN_NAME` FROM `INFORMATION_SCHEMA`.`COLUMNS` WHERE `TABLE_SCHEMA`= '#{info[which_db_env]['database']}' AND `TABLE_NAME`='#{model}';")
|
180
|
+
CSV.open("#{homedir}/#{model}.csv", "w") do |csv|
|
64
181
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
end
|
71
|
-
end
|
182
|
+
col_names=[]
|
183
|
+
columns.each do |col|
|
184
|
+
col_names.append(col[0]) #append the column names to an array, makes for good formatting
|
185
|
+
end
|
186
|
+
csv << col_names #first csv row is of column names
|
72
187
|
|
188
|
+
full_table = ActiveRecord::Base.connection.execute("SELECT * FROM #{model};")
|
189
|
+
full_table.each do |row|
|
190
|
+
csv << row #not sure if this will hold, but no 'values_at' method exists
|
191
|
+
# psql_pbar.inc
|
192
|
+
end
|
193
|
+
end
|
73
194
|
end
|
195
|
+
else
|
196
|
+
info.keys.each do |environment|
|
74
197
|
|
198
|
+
if(environment == 'default') #in Rails 4.1+ environments inherit from default, which does not have database so we will not include it
|
199
|
+
next
|
200
|
+
end
|
75
201
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
202
|
+
homedir = "lib/ferry_to_csv_#{environment}"
|
203
|
+
|
204
|
+
ActiveRecord::Base.establish_connection( #this may not work on default rails if production db is not created (must run rake db:create:all)
|
205
|
+
adapter: 'mysql2',
|
206
|
+
host: info[environment]['host'] || 'localhost',
|
207
|
+
username: info[environment]['username'],
|
208
|
+
password: info[environment]['password'],
|
209
|
+
database: info[environment]['database']
|
210
|
+
)
|
211
|
+
puts "connected to #{environment} env db"
|
212
|
+
FileUtils.mkdir homedir unless Dir[homedir].present?
|
213
|
+
puts "exporting tables to #{homedir}"
|
214
|
+
# psql_pbar = ProgressBar.new("psql_to_csv", 100)
|
215
|
+
|
216
|
+
ActiveRecord::Base.connection.tables.each do |model| #for each model in the db
|
217
|
+
columns = ActiveRecord::Base.connection.execute("SELECT `COLUMN_NAME` FROM `INFORMATION_SCHEMA`.`COLUMNS` WHERE `TABLE_SCHEMA`= '#{info[environment]['database']}' AND `TABLE_NAME`='#{model}';")
|
218
|
+
CSV.open("#{homedir}/#{model}.csv", "w") do |csv|
|
219
|
+
|
220
|
+
col_names=[]
|
221
|
+
columns.each do |col|
|
222
|
+
col_names.append(col[0]) #append the column names to an array, makes for good formatting
|
223
|
+
end
|
224
|
+
csv << col_names #first csv row is of column names
|
225
|
+
|
226
|
+
full_table = ActiveRecord::Base.connection.execute("SELECT * FROM #{model};")
|
227
|
+
full_table.each do |row|
|
228
|
+
csv << row #not sure if this will hold, but no 'values_at' method exists
|
229
|
+
# psql_pbar.inc
|
230
|
+
end
|
231
|
+
end
|
232
|
+
end
|
233
|
+
end
|
234
|
+
end
|
235
|
+
when "mongo"
|
236
|
+
puts "mongo is currently not supported"
|
237
|
+
else
|
238
|
+
puts "Unknown db type or no database associated with this application."
|
82
239
|
end
|
83
|
-
|
84
|
-
|
85
|
-
|
240
|
+
end
|
241
|
+
|
242
|
+
def to_new_db_type
|
243
|
+
info = YAML::load(IO.read("config/database.yml"))
|
244
|
+
current_db_type = info[which_db_env]["adapter"]
|
245
|
+
puts "switching the #{which_db_env} database's adapter"
|
246
|
+
puts "current_db_type: #{current_db_type}"
|
247
|
+
puts "to_new_db_type: #{switch_to_db_type}"
|
86
248
|
|
249
|
+
# check for dependencies
|
250
|
+
# if dependencies exist - install them
|
251
|
+
# create new connection
|
252
|
+
# transfer old db into new connection
|
253
|
+
# drop old connection
|
254
|
+
# update the config file
|
255
|
+
# profit
|
87
256
|
end
|
88
|
-
end
|
89
257
|
|
90
|
-
|
91
|
-
# def migrate(options, &block)
|
92
|
-
# options[:max_workers] ||= 4
|
93
|
-
# options[:batch_size] ||= 10_000
|
94
|
-
|
95
|
-
# log = Logger.new()
|
96
|
-
|
97
|
-
# active_workers = []
|
98
|
-
# collection = self
|
99
|
-
# collection.find_in_batches(batch_size: options[:batch_size]) do |batch|
|
100
|
-
# if active_workers.length >= options[:max_workers]
|
101
|
-
# log.write "active_workers oversized at capacity of #{active_workers.length}/#{options[:max_workers]}"
|
102
|
-
# finished_process = Process.wait
|
103
|
-
# log.write "finished_process: #{finished_process}"
|
104
|
-
# active_workers.delete finished_process
|
105
|
-
# log.write "active_workers capacity now at: #{active_workers.length}/#{options[:max_workers]}"
|
106
|
-
# else
|
107
|
-
# active_workers << fork do
|
108
|
-
# ActiveRecord::Base.connection.reconnect!
|
109
|
-
# log.write "kicking off engine on batch(#{batch.first}-#{batch.last})"
|
110
|
-
# engine = Engine.new()
|
111
|
-
# engine.run({log: log, batch: batch}, &block)
|
112
|
-
# end
|
113
|
-
# end
|
114
|
-
# ActiveRecord::Base.connection.reconnect!
|
115
|
-
# end
|
116
|
-
# end
|
117
|
-
# end
|
258
|
+
end
|
118
259
|
end
|
data/lib/ferry/version.rb
CHANGED
data/spec/spec_helper.rb
CHANGED
@@ -1,89 +1,11 @@
|
|
1
|
-
# This file was generated by the `rspec --init` command. Conventionally, all
|
2
|
-
# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
|
3
|
-
# The generated `.rspec` file contains `--require spec_helper` which will cause this
|
4
|
-
# file to always be loaded, without a need to explicitly require it in any files.
|
5
|
-
#
|
6
|
-
# Given that it is always loaded, you are encouraged to keep this file as
|
7
|
-
# light-weight as possible. Requiring heavyweight dependencies from this file
|
8
|
-
# will add to the boot time of your test suite on EVERY test run, even for an
|
9
|
-
# individual file that may not need all of that loaded. Instead, make a
|
10
|
-
# separate helper file that requires this one and then use it only in the specs
|
11
|
-
# that actually need it.
|
12
|
-
#
|
13
|
-
# The `.rspec` file also contains a few flags that are not defaults but that
|
14
|
-
# users commonly want.
|
15
|
-
#
|
16
|
-
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
17
1
|
require 'ferry'
|
18
2
|
|
19
|
-
ActiveRecord::Base.establish_connection(:adapter => "sqlite3",
|
20
|
-
|
3
|
+
ActiveRecord::Base.establish_connection(:adapter => "sqlite3",
|
4
|
+
:database => File.dirname(__FILE__) + "/ferry.sqlite3")
|
21
5
|
|
22
6
|
load File.dirname(__FILE__) + '/support/schema.rb'
|
23
7
|
load File.dirname(__FILE__) + '/support/models.rb'
|
24
8
|
load File.dirname(__FILE__) + '/support/data.rb'
|
25
9
|
|
26
|
-
|
27
|
-
|
28
10
|
RSpec.configure do |config|
|
29
|
-
# The settings below are suggested to provide a good initial experience
|
30
|
-
# with RSpec, but feel free to customize to your heart's content.
|
31
|
-
=begin
|
32
|
-
# These two settings work together to allow you to limit a spec run
|
33
|
-
# to individual examples or groups you care about by tagging them with
|
34
|
-
# `:focus` metadata. When nothing is tagged with `:focus`, all examples
|
35
|
-
# get run.
|
36
|
-
config.filter_run :focus
|
37
|
-
config.run_all_when_everything_filtered = true
|
38
|
-
|
39
|
-
# Many RSpec users commonly either run the entire suite or an individual
|
40
|
-
# file, and it's useful to allow more verbose output when running an
|
41
|
-
# individual spec file.
|
42
|
-
if config.files_to_run.one?
|
43
|
-
# Use the documentation formatter for detailed output,
|
44
|
-
# unless a formatter has already been configured
|
45
|
-
# (e.g. via a command-line flag).
|
46
|
-
config.default_formatter = 'doc'
|
47
|
-
end
|
48
|
-
|
49
|
-
# Print the 10 slowest examples and example groups at the
|
50
|
-
# end of the spec run, to help surface which specs are running
|
51
|
-
# particularly slow.
|
52
|
-
config.profile_examples = 10
|
53
|
-
|
54
|
-
# Run specs in random order to surface order dependencies. If you find an
|
55
|
-
# order dependency and want to debug it, you can fix the order by providing
|
56
|
-
# the seed, which is printed after each run.
|
57
|
-
# --seed 1234
|
58
|
-
config.order = :random
|
59
|
-
|
60
|
-
# Seed global randomization in this process using the `--seed` CLI option.
|
61
|
-
# Setting this allows you to use `--seed` to deterministically reproduce
|
62
|
-
# test failures related to randomization by passing the same `--seed` value
|
63
|
-
# as the one that triggered the failure.
|
64
|
-
Kernel.srand config.seed
|
65
|
-
|
66
|
-
# rspec-expectations config goes here. You can use an alternate
|
67
|
-
# assertion/expectation library such as wrong or the stdlib/minitest
|
68
|
-
# assertions if you prefer.
|
69
|
-
config.expect_with :rspec do |expectations|
|
70
|
-
# Enable only the newer, non-monkey-patching expect syntax.
|
71
|
-
# For more details, see:
|
72
|
-
# - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax
|
73
|
-
expectations.syntax = :expect
|
74
|
-
end
|
75
|
-
|
76
|
-
# rspec-mocks config goes here. You can use an alternate test double
|
77
|
-
# library (such as bogus or mocha) by changing the `mock_with` option here.
|
78
|
-
config.mock_with :rspec do |mocks|
|
79
|
-
# Enable only the newer, non-monkey-patching expect syntax.
|
80
|
-
# For more details, see:
|
81
|
-
# - http://teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
|
82
|
-
mocks.syntax = :expect
|
83
|
-
|
84
|
-
# Prevents you from mocking or stubbing a method that does not exist on
|
85
|
-
# a real object. This is generally recommended.
|
86
|
-
mocks.verify_partial_doubles = true
|
87
|
-
end
|
88
|
-
=end
|
89
11
|
end
|
data/spec/support/data.rb
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
Design.create(
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
2
|
+
:design_id => 1,
|
3
|
+
:product_id => 1,
|
4
|
+
:account_id => 1,
|
5
|
+
:account_file => "Reunion 2014",
|
6
|
+
:save_method => "WWW",
|
7
|
+
:total_units => 25,
|
8
|
+
:has_upload => true,
|
9
|
+
:created_at => DateTime.now,
|
10
|
+
:updated_at => DateTime.now,
|
11
|
+
:postal_code => 96822)
|
data/spec/support/models.rb
CHANGED
data/spec/support/schema.rb
CHANGED
@@ -1,18 +1,17 @@
|
|
1
1
|
ActiveRecord::Schema.define do
|
2
2
|
self.verbose = false
|
3
|
+
create_table :designs do |t|
|
4
|
+
t.integer :design_id
|
5
|
+
t.integer :product_id
|
6
|
+
t.integer :account_id
|
7
|
+
t.string :account_file
|
8
|
+
t.string :save_method
|
9
|
+
t.integer :total_units
|
10
|
+
t.boolean :has_upload
|
11
|
+
t.date :created_at
|
12
|
+
t.date :updated_at
|
13
|
+
t.integer :postal_code
|
3
14
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
t.integer :account_id
|
8
|
-
t.string :account_file
|
9
|
-
t.string :save_method
|
10
|
-
t.integer :total_units
|
11
|
-
t.boolean :has_upload
|
12
|
-
t.date :created_at
|
13
|
-
t.date :updated_at
|
14
|
-
t.integer :postal_code
|
15
|
-
|
16
|
-
t.timestamps
|
17
|
-
end
|
18
|
-
end
|
15
|
+
t.timestamps
|
16
|
+
end
|
17
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ferry
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Anthony Corletti
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2014-09-
|
13
|
+
date: 2014-09-30 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: activerecord
|
@@ -40,20 +40,6 @@ dependencies:
|
|
40
40
|
- - "~>"
|
41
41
|
- !ruby/object:Gem::Version
|
42
42
|
version: '1.6'
|
43
|
-
- !ruby/object:Gem::Dependency
|
44
|
-
name: progressbar
|
45
|
-
requirement: !ruby/object:Gem::Requirement
|
46
|
-
requirements:
|
47
|
-
- - ">="
|
48
|
-
- !ruby/object:Gem::Version
|
49
|
-
version: '0'
|
50
|
-
type: :development
|
51
|
-
prerelease: false
|
52
|
-
version_requirements: !ruby/object:Gem::Requirement
|
53
|
-
requirements:
|
54
|
-
- - ">="
|
55
|
-
- !ruby/object:Gem::Version
|
56
|
-
version: '0'
|
57
43
|
- !ruby/object:Gem::Dependency
|
58
44
|
name: rake
|
59
45
|
requirement: !ruby/object:Gem::Requirement
|
@@ -142,15 +128,11 @@ files:
|
|
142
128
|
- README.md
|
143
129
|
- Rakefile
|
144
130
|
- bin/ferry
|
145
|
-
- dbfile.sqlite3
|
146
131
|
- ferry.gemspec
|
147
132
|
- lib/ferry.rb
|
148
|
-
- lib/ferry/engine.rb
|
149
|
-
- lib/ferry/logger.rb
|
150
133
|
- lib/ferry/version.rb
|
151
|
-
- lib/tasks/dump_task.rake
|
152
134
|
- spec/ferry.sqlite3
|
153
|
-
- spec/ferry_spec.rb
|
135
|
+
- spec/lib/ferry_spec.rb
|
154
136
|
- spec/spec_helper.rb
|
155
137
|
- spec/support/data.rb
|
156
138
|
- spec/support/models.rb
|
@@ -181,7 +163,7 @@ specification_version: 4
|
|
181
163
|
summary: Ferry is a data migration and data manipulation tool
|
182
164
|
test_files:
|
183
165
|
- spec/ferry.sqlite3
|
184
|
-
- spec/ferry_spec.rb
|
166
|
+
- spec/lib/ferry_spec.rb
|
185
167
|
- spec/spec_helper.rb
|
186
168
|
- spec/support/data.rb
|
187
169
|
- spec/support/models.rb
|
data/dbfile.sqlite3
DELETED
File without changes
|
data/lib/ferry/engine.rb
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
class Engine
|
2
|
-
def initialize(options={})
|
3
|
-
end
|
4
|
-
|
5
|
-
def run(options, &block)
|
6
|
-
log = options[:log]
|
7
|
-
collection = options[:batch]
|
8
|
-
log.write "collection length: #{collection.length}"
|
9
|
-
begin
|
10
|
-
instance_exec(collection, &block)
|
11
|
-
rescue Exception => e
|
12
|
-
log.write "Error: #{e}"
|
13
|
-
end
|
14
|
-
log.write "worker finished"
|
15
|
-
end
|
16
|
-
end
|
data/lib/ferry/logger.rb
DELETED
@@ -1,12 +0,0 @@
|
|
1
|
-
class Logger
|
2
|
-
# def initialize(options={})
|
3
|
-
# @homedir = options[:homedir] ||= "log"
|
4
|
-
# FileUtils.mkdir @homedir unless Dir[@homedir].present?
|
5
|
-
# FileUtils.touch "#{@homedir}/ferry.log"
|
6
|
-
# end
|
7
|
-
|
8
|
-
# def write(msg)
|
9
|
-
# log = File.open("#{@homedir}/ferry.log", 'w')
|
10
|
-
# log.puts msg
|
11
|
-
# end
|
12
|
-
end
|
data/lib/tasks/dump_task.rake
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
namespace :db do
|
2
|
-
desc "Dump schema and data to db/schema.rb and db/data.yml"
|
3
|
-
task(:dump => [ "db:schema:dump", "db:data:dump" ])
|
4
|
-
|
5
|
-
namespace :data do
|
6
|
-
def db_dump_data_file (extension = "yml")
|
7
|
-
"#{dump_dir}/data.#{extension}"
|
8
|
-
end
|
9
|
-
|
10
|
-
desc "Dump contents of database to db/data.extension (defaults to yaml)"
|
11
|
-
task :dump => :environment do
|
12
|
-
# format_class = ENV['class'] || "YamlDb::Helper"
|
13
|
-
# helper = format_class.constantize
|
14
|
-
# SerializationHelper::Base.new(helper).dump db_dump_data_file helper.extension
|
15
|
-
puts "yolo"
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|