data_task 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,28 @@
1
+ require_relative '../../db'
2
+
3
+ module Rake
4
+ module DataTask
5
+
6
+ class Db
7
+ module StandardTransactions
8
+
9
+ def with_transaction do_commit, &block
10
+ execute "begin;"
11
+ yield
12
+ close_command = do_commit ? "commit;" : "rollback;"
13
+ execute close_command
14
+ end
15
+
16
+ def with_transaction_commit &block
17
+ with_transaction true, &block
18
+ end
19
+
20
+ def with_transaction_rollback &block
21
+ with_transaction false, &block
22
+ end
23
+
24
+ end
25
+ end
26
+
27
+ end
28
+ end
@@ -0,0 +1,34 @@
1
+ require_relative './db'
2
+
3
+ module Rake
4
+ module DataTask
5
+
6
+ # Represents a table accessed via a database, roughly analogous to the File class.
7
+ class Data
8
+
9
+ attr_accessor :name
10
+ attr_accessor :adapter
11
+
12
+ def initialize data_name, adapter
13
+ @name = data_name
14
+ @adapter = adapter
15
+ end
16
+
17
+ def exists? options={}
18
+ @adapter.data_exists?(@name, options)
19
+ end
20
+
21
+ alias_method :exist?, :exists?
22
+
23
+ def mtime
24
+ @adapter.data_mtime(@name)
25
+ end
26
+
27
+ def to_s
28
+ @name
29
+ end
30
+
31
+ end
32
+
33
+ end
34
+ end
@@ -0,0 +1,57 @@
1
+ require 'yaml'
2
+ require 'logger'
3
+ require_relative './sql'
4
+ require_relative './util'
5
+
6
+ module Rake
7
+ module DataTask
8
+
9
+ class Db
10
+ # This is the base class for SQL-compliant relational databases. It contains utility methods
11
+ # that probably don't vary across databases, and it shouldn't be instantiated.
12
+
13
+ LOG = Logger.new(STDOUT)
14
+ LOG.level = Logger::WARN
15
+
16
+ TABLE_TRACKER_NAME = 'last_operations'
17
+
18
+ # enumerate case-sensitive, DBMS-specific values that we store in tracking tables
19
+ # this can be overridden in child classes for specific databases
20
+ TABLE_TRACKER_COLUMNS = {
21
+ :relation_name => { :data_type => :text },
22
+ :relation_type => {
23
+ :data_type => :text,
24
+ :values => {
25
+ :table => 'table',
26
+ :view => 'view'
27
+ }
28
+ },
29
+ :operation => {
30
+ :data_type => :text,
31
+ :values => {
32
+ :create => 'create',
33
+ :insert => 'insert',
34
+ :update => 'update',
35
+ :truncate => 'truncate',
36
+ :delete => 'delete'
37
+ }
38
+ },
39
+ :time => { :data_type => :timestamp }
40
+ }
41
+
42
+ def table_tracker_columns
43
+ TABLE_TRACKER_COLUMNS
44
+ end
45
+
46
+ def operation_values
47
+ table_tracker_columns[:operation][:values]
48
+ end
49
+
50
+ def relation_type_values
51
+ table_tracker_columns[:relation_type][:values]
52
+ end
53
+
54
+ end
55
+
56
+ end
57
+ end
@@ -0,0 +1,49 @@
1
+ require 'time'
2
+
3
+ module Rake
4
+ module DataTask
5
+
6
+ class Sql
7
+
8
+ # Parse a single string value into an object using the supplied type logic.
9
+ #
10
+ # @param r [Array] an array (table) of arrays (rows), usually resulting from a database query
11
+ # @param &type_logic [Block] code that takes the first
12
+ # @raise [TypeError] if r contains more than one row or column
13
+ # @returns [Object] the return value of &type_logic on the value in r
14
+ def self.parse_single_value r, &type_logic
15
+ if r.nil? || r.empty? || r == [[]] || r == [[nil]]
16
+ return nil
17
+ elsif r.length > 1
18
+ raise TypeError, 'Query must result in a single row'
19
+ elsif r.first.length > 1
20
+ raise TypeError, 'Query must result in a single column'
21
+ end
22
+ yield(r)
23
+ end
24
+
25
+ # Get a single integer via SQL.
26
+ #
27
+ # @param r [Array] an array containing a single result from a query
28
+ # @returns [Integer] the single result converted to an Integer
29
+ def self.get_single_int r
30
+ parse_single_value r do
31
+ Integer(r.first.first)
32
+ end
33
+ end
34
+
35
+ # Get a single time via SQL.
36
+ #
37
+ # @param r [Array] an array containing a single result from a query
38
+ # @returns [Time] the single result converted to Ruby's local time
39
+ def self.get_single_time r
40
+ parse_single_value r do
41
+ t = Time.parse(r.first.first)
42
+ DateTime.parse(t.to_s)
43
+ end
44
+ end
45
+
46
+ end
47
+
48
+ end
49
+ end
@@ -0,0 +1,35 @@
1
+ require 'data_task/adapters/postgres'
2
+ require 'data_task/adapters/sqlite'
3
+
4
+ # set up adapters to two databases
5
+ postgres = Rake::DataTask::Postgres.new(
6
+ 'host' => 'localhost', 'port' => 5432, 'database' => 'example', 'username' => 'postgres')
7
+ sqlite = Rake::DataTask::Sqlite.new('database' => 'example')
8
+
9
+
10
+ desc "Build a data file."
11
+ file 'raw.txt' do
12
+ File.open('raw.txt', 'w') { |file| file.write("v1") }
13
+ end
14
+
15
+ desc "Load a data file into PostgreSQL for analysis."
16
+ data postgres['raw'] => 'raw.txt' do
17
+ postgres.create_table "raw", nil, "(var1 text)"
18
+ postgres.execute "copy raw from '#{File.expand_path('raw.txt',Dir.pwd)}'"
19
+ end
20
+
21
+ desc "Perform analysis in PostgreSQL."
22
+ data postgres['analyzed'] => postgres['raw'] do
23
+ # perform analysis ...
24
+ postgres.create_table "analyzed", "select * from raw", nil
25
+ end
26
+
27
+ desc "Archive analysis results in SQLite."
28
+ data sqlite['analyzed_archive'] => postgres['analyzed'] do
29
+ sqlite.create_table "analyzed_archive", nil, "(var1 text)"
30
+ r = postgres.execute "select var1 from analyzed"
31
+ sqlite.execute <<-EOSQL
32
+ insert into analyzed_archive values
33
+ ('#{ r.flatten.join("'),('") }')
34
+ EOSQL
35
+ end
@@ -0,0 +1,6 @@
1
+ class Array
2
+ def to_quoted_s
3
+ commad = self.join("','")
4
+ "'#{commad}'"
5
+ end
6
+ end
@@ -0,0 +1,5 @@
1
+ module Rake
2
+ module DataTask
3
+ VERSION = "0.0.2"
4
+ end
5
+ end
data/lib/data_task.rb ADDED
@@ -0,0 +1,76 @@
1
+ require 'rake'
2
+ require 'data_task/version'
3
+ require 'data_task/data'
4
+ require 'data_task/adapters/postgres'
5
+ require 'data_task/adapters/sqlite'
6
+
7
+ module Rake
8
+ # #########################################################################
9
+ # A DataTask is a task that includes time based dependencies. If any of a
10
+ # DataTask's prerequisites has a timestamp that is later than the data
11
+ # represented by this task, then the data must be rebuilt (using the
12
+ # supplied actions).
13
+ module DataTask
14
+
15
+ class DataTask < ::Rake::Task
16
+
17
+ # Instantiate a new DataTask.
18
+ #
19
+ # @param [Data] data the Data object that keeps track of existence and modification
20
+ # @param [Rake::Application] app required by the parent class's constructor
21
+ def initialize(data, app)
22
+ super
23
+ @data = data
24
+ end
25
+
26
+ # Is this table task needed? Yes if it doesn't exist, or if its time stamp
27
+ # is out of date.
28
+ def needed?
29
+ !@data.exist? || out_of_date?(timestamp)
30
+ end
31
+
32
+ # Time stamp for data task.
33
+ def timestamp
34
+ if @data.exist?
35
+ mtime = @data.mtime.to_time
36
+ raise "Table #{name} exists but modified time is unavailable." if mtime.nil?
37
+ mtime
38
+ else
39
+ Rake::EARLY
40
+ end
41
+ end
42
+
43
+ private
44
+
45
+ # Are there any prerequisites with a later time than the given time stamp?
46
+ def out_of_date?(stamp)
47
+ @prerequisites.any? do |n|
48
+ prereq_time = application[n, @scope].timestamp
49
+ return false if prereq_time == Rake::EARLY
50
+
51
+ prereq_time > stamp
52
+ end
53
+ end
54
+
55
+ # ----------------------------------------------------------------
56
+ # Task class methods.
57
+ #
58
+ class << self
59
+ # Apply the scope to the task name according to the rules for this kind
60
+ # of task. Table based tasks ignore the scope when creating the name.
61
+ def scope_name(scope, task_name)
62
+ task_name
63
+ end
64
+ end
65
+
66
+ end
67
+
68
+ end
69
+ end
70
+
71
+ def data(*args, &block)
72
+ # The task name in *args here is a Data returned by the adapter. Rake will key this task by
73
+ # Data.to_s in @tasks [Array]. All task recording and lookup in Rake is already done via to_s
74
+ # already to accomdate tasks named by symbols.
75
+ Rake::DataTask::DataTask.define_task(*args, &block)
76
+ end
@@ -0,0 +1,10 @@
1
+ sqlite_test:
2
+ adapter: sqlite
3
+ database: ci_test
4
+
5
+ postgres_test:
6
+ adapter: postgres
7
+ database: ci_test
8
+ host: localhost
9
+ port: 5432
10
+ username: postgres
data/test/data_spec.rb ADDED
@@ -0,0 +1,81 @@
1
+ require_relative './helper.rb'
2
+
3
+ module Rake
4
+ module DataTask
5
+
6
+ describe Data do
7
+
8
+ test_data_name = "test"
9
+
10
+ def mtime_updated? data, operation
11
+ original_mtime = data.mtime
12
+ sleep(1)
13
+ operation.call
14
+ data.mtime > original_mtime
15
+ end
16
+
17
+ around do |test|
18
+ @adapter = get_adapter
19
+ @adapter.with_transaction_rollback do
20
+ test.call
21
+ end
22
+ end
23
+
24
+ it "has a modified time after creation" do
25
+ @adapter.with_tracking do
26
+ @adapter.create_data test_data_name, nil, "(var1 integer)"
27
+ t = Data.new(test_data_name, @adapter)
28
+ t.mtime.to_time.must_be :>, Time.new(0)
29
+ end
30
+ end
31
+
32
+ it "has an updated modified time after insert" do
33
+ @adapter.with_tracking do
34
+ @adapter.create_data test_data_name, nil, "(var1 integer)"
35
+ t = Data.new(test_data_name, @adapter)
36
+ operation = lambda do
37
+ @adapter.execute "insert into #{test_data_name} values (1)"
38
+ end
39
+ mtime_updated?(t, operation).must_equal true
40
+ end
41
+ end
42
+
43
+ it "has an updated modified time after update" do
44
+ @adapter.with_tracking do
45
+ @adapter.create_data test_data_name, nil, "(var1 integer, var2 integer)"
46
+ t = Data.new(test_data_name, @adapter)
47
+ @adapter.execute "insert into #{test_data_name} values (1, 1)"
48
+ operation = lambda do
49
+ @adapter.execute "update #{test_data_name} set var2 = 2 where var1 = 1"
50
+ end
51
+ mtime_updated?(t, operation).must_equal true
52
+ end
53
+ end
54
+
55
+ it "has an updated modified time after delete" do
56
+ @adapter.with_tracking do
57
+ @adapter.create_data test_data_name, nil, "(var1 integer)"
58
+ t = Data.new(test_data_name, @adapter)
59
+ @adapter.execute "insert into #{test_data_name} values (1)"
60
+ operation = lambda do
61
+ @adapter.execute "delete from #{test_data_name}"
62
+ end
63
+ mtime_updated?(t, operation).must_equal true
64
+ end
65
+ end
66
+
67
+ it "has an updated modified time after truncate" do
68
+ @adapter.with_tracking do
69
+ @adapter.create_data test_data_name, nil, "(var1 integer)"
70
+ t = Data.new(test_data_name, @adapter)
71
+ @adapter.execute "insert into #{test_data_name} values (1)"
72
+ operation = lambda do
73
+ @adapter.truncate_data test_data_name
74
+ end
75
+ mtime_updated?(t, operation).must_equal true
76
+ end
77
+ end
78
+
79
+ end
80
+ end
81
+ end
data/test/db_spec.rb ADDED
@@ -0,0 +1,202 @@
1
+ require_relative './helper.rb'
2
+
3
+ module Rake
4
+ module DataTask
5
+
6
+ describe Db do
7
+
8
+ around do |test|
9
+ @adapter = get_adapter
10
+ @adapter.with_transaction_rollback do
11
+ test.call
12
+ end
13
+ end
14
+
15
+ test_table = "test_table"
16
+ test_view = "test_view"
17
+
18
+ it "finds a table when it exists" do
19
+ if !@adapter.table_exists?(test_table)
20
+ @adapter.execute <<-EOSQL
21
+ create table #{test_table} (var1 text)
22
+ EOSQL
23
+ end
24
+ @adapter.table_exists?(test_table).must_equal true
25
+ end
26
+
27
+ it "does not find a table when it does not exist" do
28
+ if @adapter.table_exists?(test_table)
29
+ @adapter.execute <<-EOSQL
30
+ drop table #{test_table}
31
+ EOSQL
32
+ end
33
+ @adapter.table_exists?(test_table).must_equal false
34
+ end
35
+
36
+ it "creates a table when called to" do
37
+ @adapter.with_tracking do
38
+
39
+ @adapter.create_table test_table, nil, '(var1 text)'
40
+ @adapter.table_exists?(test_table).must_equal true
41
+
42
+ end
43
+ end
44
+
45
+ it "drops a table when called to" do
46
+ @adapter.with_tracking do
47
+
48
+ @adapter.create_table test_table, nil, '(var1 text)'
49
+ @adapter.drop_table test_table
50
+ @adapter.table_exists?(test_table).must_equal false
51
+
52
+ end
53
+ end
54
+
55
+ it "creates a view when called to" do
56
+ @adapter.with_tracking do
57
+
58
+ @adapter.create_table test_table, nil, '(var1 text)'
59
+ @adapter.create_view test_view, "select * from #{test_table}"
60
+ @adapter.view_exists?(test_view).must_equal true
61
+
62
+ end
63
+ end
64
+
65
+ it "drops a view when called to" do
66
+ @adapter.with_tracking do
67
+
68
+ @adapter.create_table test_table, nil, '(var1 text)'
69
+ @adapter.create_view test_view, "select * from #{test_table}"
70
+ @adapter.drop_view test_view
71
+ @adapter.view_exists?(test_view).must_equal false
72
+
73
+ end
74
+ end
75
+
76
+ it "drops a view when the underlying table is dropped" do
77
+ @adapter.with_tracking do
78
+
79
+ @adapter.create_table test_table, nil, '(var1 text)'
80
+ @adapter.create_view test_view, "select * from #{test_table}"
81
+ @adapter.drop_table test_table
82
+ @adapter.view_exists?(test_view).must_equal false
83
+
84
+ end
85
+ end
86
+
87
+ it "updates the tracking table when it creates a table" do
88
+ @adapter.with_tracking do
89
+
90
+ @adapter.create_table test_table, nil, '(var1 text)'
91
+ tracked_create = Sql.get_single_int(
92
+ @adapter.execute <<-EOSQL
93
+ select 1 from #{Db::TABLE_TRACKER_NAME}
94
+ where
95
+ relation_name = '#{test_table}' and
96
+ relation_type = '#{@adapter.relation_type_values[:table]}' and
97
+ operation = '#{@adapter.operation_values[:create]}'
98
+ EOSQL
99
+ )
100
+ tracked_create.must_equal 1
101
+
102
+ end
103
+ end
104
+
105
+ it "updates the tracking table when it drops a table" do
106
+ @adapter.with_tracking do
107
+
108
+ @adapter.create_table test_table, nil, '(var1 text)'
109
+ @adapter.drop_table test_table
110
+ still_tracking_table = Sql.get_single_int(
111
+ @adapter.execute <<-EOSQL
112
+ select 1 from #{Db::TABLE_TRACKER_NAME}
113
+ where
114
+ relation_name = '#{test_table}' and
115
+ relation_type = '#{@adapter.relation_type_values[:table]}'
116
+ EOSQL
117
+ )
118
+ still_tracking_table.must_be_nil
119
+
120
+ end
121
+ end
122
+
123
+ it "updates the tracking table on insert to a tracked table" do
124
+ @adapter.with_tracking do
125
+
126
+ @adapter.create_table test_table, nil, '(var1 text)'
127
+ @adapter.execute <<-EOSQL
128
+ insert into #{test_table} values ('a')
129
+ EOSQL
130
+ tracked_insert = Sql.get_single_int(
131
+ @adapter.execute <<-EOSQL
132
+ select 1 from #{Db::TABLE_TRACKER_NAME}
133
+ where
134
+ relation_name = '#{test_table}' and
135
+ relation_type = '#{@adapter.relation_type_values[:table]}' and
136
+ operation = '#{@adapter.operation_values[:insert]}'
137
+ EOSQL
138
+ )
139
+ tracked_insert.must_equal 1
140
+
141
+ end
142
+ end
143
+
144
+ it "updates the tracking table on update on a tracked table" do
145
+ @adapter.with_tracking do
146
+
147
+ @adapter.create_table test_table, nil, '(var1 text, var2 text)'
148
+ @adapter.execute <<-EOSQL
149
+ insert into #{test_table} values ('a', 'a')
150
+ EOSQL
151
+ @adapter.execute <<-EOSQL
152
+ update #{test_table} set var2 = 'b' where var1 = 'a'
153
+ EOSQL
154
+
155
+ tracked_insert = Sql.get_single_int(
156
+ @adapter.execute <<-EOSQL
157
+ select 1 from #{Db::TABLE_TRACKER_NAME}
158
+ where
159
+ relation_name = '#{test_table}' and
160
+ relation_type = '#{@adapter.relation_type_values[:table]}' and
161
+ operation = '#{@adapter.operation_values[:update]}'
162
+ EOSQL
163
+ )
164
+ tracked_insert.must_equal 1
165
+
166
+ end
167
+ end
168
+
169
+ it "updates the tracking table on truncate of a tracked table" do
170
+ @adapter.with_tracking do
171
+
172
+ @adapter.create_table test_table, nil, '(var1 text)'
173
+ @adapter.truncate_table test_table
174
+ tracked_truncate = Sql.get_single_int(
175
+ @adapter.execute <<-EOSQL
176
+ select 1 from #{Db::TABLE_TRACKER_NAME}
177
+ where
178
+ relation_name = '#{test_table}' and
179
+ relation_type = '#{@adapter.relation_type_values[:table]}' and
180
+ operation = '#{@adapter.operation_values[:truncate]}'
181
+ EOSQL
182
+ )
183
+ tracked_truncate.must_equal 1
184
+
185
+ end
186
+ end
187
+
188
+ it "says it is tracking tables after tracking is set up" do
189
+ @adapter.tear_down_tracking
190
+ @adapter.set_up_tracking
191
+ @adapter.tracking_tables?.must_equal true
192
+ end
193
+
194
+ it "says it is not tracking tables after tracking is torn down" do
195
+ @adapter.tear_down_tracking
196
+ (@adapter.tracking_tables?).must_equal false
197
+ end
198
+
199
+ end
200
+
201
+ end
202
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,37 @@
1
+ # https://coveralls.io
2
+ require 'coveralls'
3
+ Coveralls.wear!
4
+
5
+ require 'minitest/autorun'
6
+ require 'minitest/around/spec'
7
+ require 'minitest-spec-context'
8
+
9
+ require 'logger'
10
+
11
+ require 'data_task/sql'
12
+ require 'data_task/db'
13
+ require 'data_task/data'
14
+ require 'data_task'
15
+
16
+ require 'data_task/adapters/sqlite'
17
+ require 'data_task/adapters/postgres'
18
+
19
+ def get_adapter
20
+ # connect an adapter to the configured database for testing
21
+ config = YAML.load_file('test/config/database.yml')[ENV['DATATASK_ENV'] || 'sqlite_test']
22
+ klass = "Rake::DataTask::#{config['adapter'].capitalize}".split('::').inject(Object) {|memo, name| memo = memo.const_get(name); memo}
23
+ adapter = klass.new(config)
24
+
25
+ # extend the adapter to enable clean tracking setup/teardown within each test
26
+ adapter.extend(TrackingSetupTeardownHelper)
27
+
28
+ adapter
29
+ end
30
+
31
+ module TrackingSetupTeardownHelper
32
+ def with_tracking &ops
33
+ set_up_tracking
34
+ ops.call
35
+ tear_down_tracking
36
+ end
37
+ end