forklift_etl 1.1.9 → 1.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- forklift_etl (1.1.9)
4
+ forklift_etl (1.1.10)
5
5
  activesupport (~> 4.0, >= 4.0.0)
6
6
  elasticsearch (~> 1.0, >= 1.0.0)
7
7
  lumberjack (~> 1.0, >= 1.0.0)
@@ -107,21 +107,27 @@ module Forklift
107
107
  # The high water method will stub a row in all tables with a `default_matcher` column prentending to have a record from `time`
108
108
  # This enabled partial forklift funs which will only extract data "later than X"
109
109
  # TODO: assumes all columns have a default NULL setting
110
- def self.write_high_water_mark(db, time, matcher=source.default_matcher)
110
+ def self.write_high_water_mark(db, time, matcher=db.default_matcher)
111
111
  db.tables.each do |table|
112
- columns, types = db.columns(table, nil, true)
112
+ columns, types = db.columns(table, db.current_database, true)
113
113
  if columns.include?(matcher)
114
114
  row = {}
115
115
  i = 0
116
116
  while( i < columns.length )
117
117
  if(columns[i] == matcher)
118
- row[column[i]] << time.to_s(:db)
119
- elsif( types[i] =~ /text/ || types[i] =~ /varchar/ )
120
- row[column[i]] << "~~stub~~"
118
+ row[columns[i]] = time.to_s(:db)
119
+ elsif( types[i] =~ /text/ )
120
+ row[columns[i]] = "~~stub~~"
121
+ elsif( types[i] =~ /varchar/ )
122
+ row[columns[i]] = "~~stub~~".to_sym
121
123
  elsif( types[i] =~ /float/ || types[i] =~ /int/ )
122
- row[column[i]] << 0
124
+ row[columns[i]] = 0
125
+ elsif( types[i] =~ /datetime/ || types[i] =~ /timetsamp/ )
126
+ row[columns[i]] = time.to_s(:db)
127
+ elsif( types[i] =~ /date/ )
128
+ row[columns[i]] = time.to_s(:db).split(" ").first
123
129
  else
124
- row[column[i]] << "NULL"
130
+ row[columns[i]] = "NULL"
125
131
  end
126
132
  i = i + 1
127
133
  end
@@ -138,6 +138,7 @@ module Forklift
138
138
  return {
139
139
  project_root: Dir.pwd,
140
140
  batch_size: 1000,
141
+ char_bytecode_max: 65535, # the utf8 char limit
141
142
  logger: {
142
143
  stdout: true,
143
144
  debug: false,
@@ -99,7 +99,22 @@ module Forklift
99
99
  q(delete_q)
100
100
  end
101
101
  insert_q = insert_q[0...-1]
102
- q(insert_q)
102
+
103
+ begin
104
+ q(insert_q)
105
+ rescue Mysql2::Error => ex
106
+ # UTF8 Safety. Open a PR if you don't want UTF8 data...
107
+ # https://github.com/taskrabbit/demoji
108
+ raise ex unless ex.message.match /Incorrect string value:/
109
+ safer_insert_q = ""
110
+ for i in (0...insert_q.length)
111
+ char = insert_q[i]
112
+ char = '???' if char.ord > forklift.config[:char_bytecode_max]
113
+ safer_insert_q << char
114
+ end
115
+ q(safer_insert_q)
116
+ end
117
+
103
118
  forklift.logger.log "wrote #{data.length} rows to `#{database}`.`#{table}`"
104
119
  end
105
120
  end
@@ -1,3 +1,3 @@
1
1
  module Forklift
2
- VERSION = "1.1.9"
2
+ VERSION = "1.1.10"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: forklift_etl
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.9
4
+ version: 1.1.10
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: