trials 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 303d61afdd0937269e514488c1462361d6bf24a7ade281e65d3aae0270e9ae31
4
- data.tar.gz: e0e8e292b51e7166af634fdf2e6117ded4aa700567aa197280dfc35f8de96f27
3
+ metadata.gz: 8f8af3df6ab16bb8ba4b08d0ea7b1dd11507aeb2c6113a0598a5e9965d0264c9
4
+ data.tar.gz: c1e9cc7305f5cee913a7091d321811153d33e9e67d383bad0f1518034a195556
5
5
  SHA512:
6
- metadata.gz: 83503c1320739905b44c54e3bf31b469435562a508cbae1670749e0eeba6cc439a7c3f53b4a15389eeebbf8c3218473c778befbb05a2b75499b7904a0d07ddad
7
- data.tar.gz: ae364df5de6484dc9c461838b809d24644202e6979a65c90e206a95e60dabe755a08d695f1a7aa042fb490c52193dbfbf928404f83f75a6bf70499cf93390a54
6
+ metadata.gz: d4bdc989741b4f86a3b80f91979a3882c2666da4749ee7864f0b45596311cbdebd297e17950d27bb84d39efc6d30b16fa18fc1a1bd4f65b98cd213ec4e0cbba7
7
+ data.tar.gz: d3fa929a7157792cd5fce04b64f0d0312ce240f59de755d323dc1ee910a21f9a966e74342a5e4c0d05d732291ab28a7b7a324bad6a5e1afb3c0293ea0c682c1c
data/bin/trial CHANGED
@@ -1,12 +1,13 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require_relative '../lib/trials'
4
-
5
3
  # env
6
4
  START_TIME = Time.now
7
5
  SCRIPT_NAME = ARGV[0].gsub('.rb', '')
8
6
  RUN = "#{START_TIME.strftime("%Y%m%dT%H%M%S")}_#{SCRIPT_NAME}"
9
7
  ROOT = Dir.pwd
8
+
9
+ require_relative '../lib/trials'
10
+
10
11
  SECRETS = begin
11
12
  if File.exists?(File.expand_path('~/.trials/secrets.yml'))
12
13
  JSON.parse(
@@ -22,6 +23,10 @@ end
22
23
  FileUtils.mkdir_p("seeds")
23
24
  FileUtils.mkdir_p("results/#{RUN}")
24
25
  FileUtils.mkdir_p("tmp")
26
+ FileUtils.mkdir_p("cache")
27
+
28
+ # allow to init the current folder
29
+ return if SCRIPT_NAME == 'init'
25
30
 
26
31
  # log start
27
32
  log "starting #{SCRIPT_NAME} at #{START_TIME.iso8601}"
data/lib/trials.rb CHANGED
@@ -10,18 +10,18 @@ require 'nokogiri'
10
10
  require 'ostruct'
11
11
  require 'pp'
12
12
  require 'pry'
13
+ require 'rack'
13
14
  require 'rest-client'
15
+ require 'securerandom'
14
16
  require 'set'
15
17
  require 'smalltext'
16
18
  require 'sqlite3'
17
19
  require 'street_address'
18
20
  require 'tty-table'
19
21
  require 'yaml'
20
- require 'rack'
21
22
 
22
23
  require_relative 'trials/utils/various'
23
24
  require_relative 'trials/utils/csvs'
24
- require_relative 'trials/utils/hashes'
25
25
  require_relative 'trials/utils/logging'
26
26
  require_relative 'trials/utils/strings'
27
27
  require_relative 'trials/utils/files'
@@ -34,7 +34,5 @@ require_relative 'trials/utils/rollbar'
34
34
  require_relative 'trials/utils/aws'
35
35
  require_relative 'trials/utils/google_drive'
36
36
  require_relative 'trials/utils/h_array'
37
- require_relative 'trials/data_handling/addresses'
38
- require_relative 'trials/data_handling/names'
39
37
  require_relative 'trials/extensions/hash'
40
38
  require_relative 'trials/extensions/array'
@@ -6,7 +6,7 @@ def ddb_connection
6
6
  )
7
7
  end
8
8
 
9
- def cached_ddb_scan(query)
9
+ def ddb_scan_with_cache(query)
10
10
  json_cache(query.dig(:table_name)) { ddb_scan(query) }
11
11
  end
12
12
 
@@ -31,7 +31,6 @@ end
31
31
 
32
32
  def ddb_scan_without_segmentation(query)
33
33
  result = nil
34
- requests = 0
35
34
  items = []
36
35
 
37
36
  loop do
@@ -1,15 +1,15 @@
1
1
  def read_csv(filename)
2
2
  CSV
3
- .foreach(seeds_path(filename), headers: true)
3
+ .foreach(seed_path(filename), headers: true)
4
4
  .map(&:to_h)
5
5
  .map(&:symbolize_keys)
6
6
  .select { |i| i.values.any?(&:present?) }
7
7
  end
8
8
 
9
9
  def write_csv_from_hashes(file, hash_set, attrs: nil)
10
- attrs ||= uniq_hash_keys(hash_set)
10
+ attrs ||= hash_set.to_harray.uniq_keys
11
11
 
12
- CSV.open(results_path(file), 'w') do |csv|
12
+ CSV.open(result_path(file), 'w') do |csv|
13
13
  csv << attrs
14
14
 
15
15
  hash_set.each do |c|
@@ -1,110 +1,72 @@
1
- # paths
2
-
3
- def seeds_path(name)
4
- "#{ROOT}/seeds/#{name}"
5
- end
6
-
7
- alias seed_path seeds_path
8
-
9
- def seed_exists?(name)
10
- File.exist?(seed_path(name))
11
- end
12
-
13
- alias seeds_exist? seed_exists?
14
-
15
- def results_path(name)
16
- "#{ROOT}/results/#{RUN}/#{name}"
17
- end
18
-
19
- alias result_path results_path
20
-
21
- def result_exists?(name)
22
- File.exist?(result_path(name))
23
- end
24
-
25
- alias results_exist? result_exists?
26
-
27
- def tmp_path(name)
28
- "#{ROOT}/tmp/#{name}"
29
- end
30
-
31
- def tmp_exists?(name)
32
- File.exist?(tmp_path(name))
33
- end
34
-
35
- def list_dir(dir)
36
- Dir["#{seeds_path(dir)}/**/*"]
37
- end
38
-
39
- # reading
40
-
41
- def read(file)
42
- return unless seed_exists?(file)
43
- File.read(seeds_path(file))
44
- end
45
-
46
- alias read_seed read
47
- alias read_seeds read
48
-
49
- def read_tmp(file)
50
- return unless tmp_exists?(file)
51
- File.read(tmp_path(file))
52
- end
53
-
54
- def readlines(file)
55
- File.read(seeds_path(file)).split("\n")
56
- end
57
-
58
- # writing
59
-
60
- def write(file, content)
61
- FileUtils.mkdir_p(File.dirname(results_path(file)))
62
- File.open(results_path(file), 'w') { |f| f << content }
63
- end
64
-
65
- alias write_result write
66
- alias write_results write
67
-
68
- def write_tmp(file, content)
69
- FileUtils.mkdir_p(File.dirname(tmp_path(file)))
70
- File.open(tmp_path(file), 'w') { |f| f << content }
71
- end
72
-
73
- def append(file, content)
74
- File.open(results_path(file), 'a') { |f| f << content }
75
- end
76
-
77
- alias append_result append
78
- alias append_results append
79
-
80
- # deleting
81
-
82
- def delete(file)
83
- return unless result_exists?(file)
84
- FileUtils.rm_r(results_path(file))
85
- end
86
-
87
- alias delete_result delete
88
- alias delete_results delete
89
-
90
- def delete_tmp(file)
91
- return unless tmp_exists?(file)
92
- FileUtils.rm_r(tmp_path(file))
93
- end
94
-
95
- def delete_seeds(file)
96
- return unless seed_exists?(file)
97
- FileUtils.rm_r(seeds_path(file))
98
- end
99
-
100
- alias delete_seed delete_seeds
101
-
102
- # other
103
-
104
- def make_seed(file)
105
- FileUtils.cp(results_path(file), seeds_path(file))
106
- end
107
-
108
- def make_tmp(file)
109
- FileUtils.cp(results_path(file), tmp_path(file))
1
+ dirs = {
2
+ seed: { run: false, pluralized: ActiveSupport::Inflector.pluralize('seed') },
3
+ result: { run: true, pluralized: ActiveSupport::Inflector.pluralize('result') },
4
+ tmp: { run: false, pluralized: 'tmp' },
5
+ cache: { run: false, pluralized: 'cache' },
6
+ }
7
+
8
+ dirs.each do |dir, opts|
9
+ eval <<~RUBY
10
+ def #{opts.dig(:pluralized)}_root
11
+ File.join(
12
+ ROOT,
13
+ '#{opts.dig(:pluralized)}',
14
+ '#{opts.dig(:run) ? RUN : nil}',
15
+ ).to_s
16
+ end
17
+
18
+ def #{dir}_path(name)
19
+ File.join(
20
+ #{opts.dig(:pluralized)}_root,
21
+ name,
22
+ ).to_s
23
+ end
24
+
25
+ def #{dir}_exists?(name)
26
+ File.exist?(#{dir}_path(name))
27
+ end
28
+
29
+ def list_#{opts.dig(:pluralized)}(name = nil)
30
+ Dir[
31
+ File.join(
32
+ *[
33
+ #{opts.dig(:pluralized)}_root,
34
+ name,
35
+ '**',
36
+ '*',
37
+ ].compact
38
+ )
39
+ ].reject { |d| File.directory?(d) }
40
+ end
41
+
42
+ def read_#{dir}(name)
43
+ File.read(#{dir}_path(name)) if #{dir}_exists?(name)
44
+ end
45
+
46
+ def readlines_#{dir}(name)
47
+ read_#{dir}(name).split("\\n")
48
+ end
49
+
50
+ def write_#{dir}(name, content)
51
+ FileUtils.mkdir_p(File.dirname(#{dir}_path(name)))
52
+ File.open(#{dir}_path(name), 'w') { |f| f << content }
53
+ end
54
+
55
+ def append_to_#{dir}(name, content)
56
+ File.open(#{dir}_path(name), 'a') { |f| f << content }
57
+ end
58
+
59
+ def delete_#{dir}(name)
60
+ FileUtils.rm_r(#{dir}_path(name)) if #{dir}_exists?(name)
61
+ end
62
+ RUBY
63
+
64
+ dirs.each do |o_dir, o_opts|
65
+ next if o_dir == dir
66
+ eval <<~RUBY
67
+ def cp_#{o_dir}_to_#{opts.dig(:pluralized)}(name)
68
+ FileUtils.cp(#{o_dir}_path(name), #{dir}_path(name))
69
+ end
70
+ RUBY
71
+ end
110
72
  end
@@ -1,7 +1,7 @@
1
1
  def gd_session
2
2
  @gd_session ||= begin
3
- write('config.json', secrets.google.drive_config_json)
4
- session = GoogleDrive::Session.from_config(results_path("config.json"))
3
+ write_tmp('config.json', secrets.google.drive_config_json)
4
+ session = GoogleDrive::Session.from_config(tmp_path("config.json"))
5
5
  delete('config.json')
6
6
  session
7
7
  end
@@ -20,6 +20,14 @@ class HArray < Array
20
20
  end
21
21
  end
22
22
 
23
+ def normalize_keys
24
+ hashes.map do |h|
25
+ h
26
+ .map { |k, v| [normalize_key(k), v] }
27
+ .to_h
28
+ end
29
+ end
30
+
23
31
  def uniq_keys
24
32
  hashes.flat_map(&:keys).uniq.compact
25
33
  end
@@ -49,4 +57,10 @@ class HArray < Array
49
57
  .reduce(&:merge)
50
58
  end
51
59
  end
60
+
61
+ private
62
+
63
+ def normalize_key(k)
64
+ k.downcase.to_sym
65
+ end
52
66
  end
@@ -1,5 +1,5 @@
1
1
  def read_json(filename)
2
- result = JSON.parse(read(filename))
2
+ result = JSON.parse(read_seed(filename))
3
3
 
4
4
  aggressive_deep_symbolize_keys(result)
5
5
  end
@@ -11,12 +11,12 @@ end
11
11
  def json_cache(key)
12
12
  name = "json/#{key}.json"
13
13
 
14
- return aggressive_deep_symbolize_keys(JSON.parse(read_tmp(name))) if tmp_exists?(name)
14
+ return aggressive_deep_symbolize_keys(JSON.parse(read_cache(name))) if cache_exists?(name)
15
15
 
16
- write_tmp(name, yield.to_json)
16
+ write_cache(name, yield.to_json)
17
17
  json_cache(key)
18
18
  end
19
19
 
20
20
  def invalidate_json_cache
21
- delete_tmp("json")
21
+ delete_cache("json")
22
22
  end
@@ -1,7 +1,7 @@
1
1
  def render_table_from_hashes(hash_set, sort: true, headers: nil)
2
2
  return 'no data' if hash_set.blank?
3
3
 
4
- headers = headers || uniq_hash_keys(hash_set)
4
+ headers = headers || hash_set.to_harray.uniq_keys
5
5
 
6
6
  headers.sort! if sort
7
7
 
@@ -21,7 +21,7 @@ def log(item, nl: true, quiet: false, each: true)
21
21
  return
22
22
  end
23
23
 
24
- File.open(results_path('log.txt'), 'a') do |f|
24
+ File.open(result_path('log.txt'), 'a') do |f|
25
25
  f << begin
26
26
  if item.is_a?(String) || item.is_a?(Numeric)
27
27
  item.to_s
@@ -1,4 +1,4 @@
1
1
  def pdf_to_text(path)
2
- `pdftotext "#{seeds_path(path)}" #{tmp_path('tmp_pdf.txt')}; \
2
+ `pdftotext "#{seed_path(path)}" #{tmp_path('tmp_pdf.txt')}; \
3
3
  cat #{tmp_path('tmp_pdf.txt')}`
4
4
  end
@@ -1,6 +1,6 @@
1
1
  def db(db_name = nil)
2
2
  db_name ||= 'data.db'
3
- db = SQLite3::Database.new(tmp_path(db_name))
3
+ db = SQLite3::Database.new(cache_path(db_name))
4
4
  db.results_as_hash = true
5
5
  db
6
6
  end
@@ -14,7 +14,7 @@ def get_db(db_name = nil)
14
14
  end
15
15
 
16
16
  def import_csv_into_db(db_name = 'data.db', table, csv)
17
- system("sqlite3 -csv #{tmp_path(db_name)} '.import #{seed_path(csv)} #{table}'")
17
+ system("sqlite3 -csv #{cache_path(db_name)} '.import #{seed_path(csv)} #{table}'")
18
18
  end
19
19
 
20
20
  def query_db(db_name = nil, query)
@@ -2,6 +2,14 @@ def secrets
2
2
  SECRETS
3
3
  end
4
4
 
5
+ def run
6
+ RUN
7
+ end
8
+
9
+ def root_path
10
+ ROOT
11
+ end
12
+
5
13
  def or_nil
6
14
  val = yield
7
15
  raise if val.blank?
@@ -25,3 +33,13 @@ def aggressive_deep_symbolize_keys(maybe)
25
33
 
26
34
  maybe
27
35
  end
36
+
37
+ def marshal_fetch(key)
38
+ return Marshal.load(read_cache(key)) if cache_exists?(key)
39
+
40
+ File.open(cache_path(key), 'wb') do |f|
41
+ f.write(Marshal.dump(yield))
42
+ end
43
+
44
+ marshal_fetch(key)
45
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: trials
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - grahamotte
@@ -299,8 +299,6 @@ extra_rdoc_files: []
299
299
  files:
300
300
  - bin/trial
301
301
  - lib/trials.rb
302
- - lib/trials/data_handling/addresses.rb
303
- - lib/trials/data_handling/names.rb
304
302
  - lib/trials/extensions/array.rb
305
303
  - lib/trials/extensions/hash.rb
306
304
  - lib/trials/utils/aws.rb
@@ -309,7 +307,6 @@ files:
309
307
  - lib/trials/utils/files.rb
310
308
  - lib/trials/utils/google_drive.rb
311
309
  - lib/trials/utils/h_array.rb
312
- - lib/trials/utils/hashes.rb
313
310
  - lib/trials/utils/jsons.rb
314
311
  - lib/trials/utils/logging.rb
315
312
  - lib/trials/utils/pdfs.rb
@@ -1,37 +0,0 @@
1
- # normal format <number> <STREET NAME>, <CITY>, <STATE> <postal>
2
-
3
- def normalize_address(address)
4
- return if address.blank?
5
-
6
- cleaner_string = if address.respond_to?(:address1)
7
- "#{address.address1}, #{address.city}, #{address.state} #{address.postal_code}"
8
- else
9
- address
10
- end
11
-
12
- cleaner_string = cleaner_string.gsub('#', ' #').squish
13
- parsed = StreetAddress::US.parse(cleaner_string)
14
-
15
- return if parsed.blank?
16
- return if parsed.number.blank?
17
- return if parsed.street.blank?
18
- return if parsed.city.blank?
19
- return if parsed.state.blank?
20
- return if parsed.postal_code.blank?
21
-
22
- parsed.prefix = nil
23
- parsed.suffix = nil
24
- parsed.unit_prefix = nil
25
- parsed.unit = nil
26
- parsed.postal_code_ext = nil
27
-
28
- parsed.to_s.upcase
29
- end
30
-
31
- def parse_address(address_string)
32
- StreetAddress::US.parse(address_string)
33
- end
34
-
35
- def normalize_and_parse_address(address_string)
36
- StreetAddress::US.parse(normalize_address(address_string))
37
- end
@@ -1,84 +0,0 @@
1
- # normal <FIRST> <LAST>
2
-
3
- Name = Struct.new(:first_name, :middle_name, :last_name)
4
-
5
- class NamePartsParser
6
- def initialize(name_string)
7
- @namae = Namae.parse((name_string || '').upcase).first
8
- end
9
-
10
- def first
11
- normalize_name(split_first_and_middle(given).first)
12
- end
13
-
14
- def middle
15
- normalize_name(split_first_and_middle(given).last)
16
- end
17
-
18
- def last
19
- normalize_name(family)
20
- end
21
-
22
- private
23
-
24
- def given
25
- @namae&.given || ''
26
- end
27
-
28
- def family
29
- @namae&.family || ''
30
- end
31
-
32
- def split_first_and_middle(first_and_middle)
33
- names = first_and_middle.split(' ', 2)
34
- names.length == 1 ? names + [''] : names
35
- end
36
- end
37
-
38
- def normalize_name(name)
39
- return if name.blank?
40
-
41
- name = name_from_parts(name) if name.respond_to?(:first_name)
42
-
43
- name.strip.upcase.delete('^A-Z\ \-').squeeze(" ")
44
- end
45
-
46
- def normalize_full_names(names)
47
- Array.wrap(names)
48
- .map { |n| normalize_full_name(n) }
49
- .map(&:to_s)
50
- .map(&:presence)
51
- .compact
52
- .uniq
53
- end
54
-
55
- def normalize_full_name(name)
56
- return if name.blank?
57
-
58
- name.strip.upcase.delete('^A-Z\ \-').squeeze(" ")
59
- end
60
-
61
- def parse_name(name)
62
- NamePartsParser.new(name)
63
- end
64
-
65
- def normalize_and_parse_name(name)
66
- NamePartsParser.new(normalize_name(name))
67
- end
68
-
69
- # private
70
-
71
- def name_from_parts(name)
72
- first = normalize_name_part(name.first_name)
73
- last = normalize_name_part(name.last_name)
74
-
75
- [first, last].join(' ')
76
- end
77
-
78
- def full_name_from_parts(name)
79
- first = normalize_name_part(name.first_name)
80
- middle = normalize_name_part(name.middle_name)
81
- last = normalize_name_part(name.last_name)
82
-
83
- [first, middle, last].join(' ')
84
- end
@@ -1,90 +0,0 @@
1
- def uniq_hash_keys(hashes)
2
- hashes.flat_map(&:keys).uniq.compact
3
- end
4
-
5
- def sanitize_hash_value(hash, key:, type:, date_format: '%Y-%m-%d')
6
- hash.merge(
7
- key => or_nil do
8
- case type
9
- when :date
10
- Date.strptime(hash.dig(key), date_format)
11
- when :datetime
12
- DateTime.parse(hash.dig(key))
13
- when :integer, :int
14
- hash.dig(key).to_i
15
- when :float
16
- hash.dig(key).to_f
17
- when :string
18
- hash.dig(key).to_s
19
- when :alphanum
20
- string_to_alphanum(hash.dig(key))
21
- when :present?
22
- hash.dig(key).present?
23
- end
24
- end
25
- )
26
- end
27
-
28
- def sanitize_hash_values(hash, scheme = {})
29
- scheme.each do |k, v|
30
- hash = sanitize_hash_value(hash, key: k, type: v)
31
- end
32
-
33
- hash
34
- end
35
-
36
- def rename_hash_key(hash, from:, to:)
37
- hash[to] = hash.delete(from)
38
- hash
39
- end
40
-
41
- def rename_hash_keys(hash, scheme = {})
42
- scheme.each do |k, v|
43
- hash = rename_hash_key(hash, from: k, to: v)
44
- end
45
-
46
- hash
47
- end
48
-
49
- def merge_hash_groups(*groups, key:, join_type: :inner)
50
- groups = groups.map { |group| group.map { |g| [g.dig(key), g] }.to_h }
51
-
52
- keys = begin
53
- case join_type
54
- when :inner
55
- groups.map(&:keys).reduce(&:&)
56
- when :all
57
- groups.flat_map(&:keys).uniq
58
- when :first
59
- groups.first.keys
60
- end
61
- end
62
-
63
- keys.map { |key| groups.map { |g| g.dig(key) }.compact.reduce(&:merge) }
64
- end
65
-
66
- def count_for_group_by(batch, &block)
67
- batch
68
- .group_by(&block)
69
- .map { |k, v| [k, v.length] }
70
- .to_h
71
- end
72
-
73
- def array_to_count_hash(list)
74
- list.uniq.reduce({}) do |h, i|
75
- h[i] = list.count(i)
76
- h
77
- end
78
- end
79
-
80
- def update_counts_hash(counts, update)
81
- update.each do |k, v|
82
- if counts.key?(k)
83
- counts[k] += v
84
- else
85
- counts[k] = v
86
- end
87
- end
88
-
89
- counts
90
- end