csv_orm 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9bb9dcf9aaa806b3e787579a8c0d3d058ef95ac4
4
- data.tar.gz: 1ea60c99b878d4b63de6ec17666e0ac90f309759
3
+ metadata.gz: 40f766dd6da056cc5ab425d390c33a86783b1c1f
4
+ data.tar.gz: da13ec654fac0cfddb2375f274f6f0f11f099ed4
5
5
  SHA512:
6
- metadata.gz: 7d63c9a5774ec628d26fd88b823dfdb136efa7d4a875fbe69538fa79c46654fe125ea05e153c2889cd3b118f786bb5d6c076b30d17caba3e869d0a2fdcb645b2
7
- data.tar.gz: d1ae0d13d08b6ccfbc89bf262cbc16eabacbf4d8fb55e9784ff73c7e2a304cecd78566d3d2e343e0c77e435a12dd0b73e4776bd3f490c4a42e44ae4e0f30e13b
6
+ metadata.gz: aba4290e99c36ebd87b7cf0748cc25deb18c278e33346c6e71717bf6496501ca286f6ee2b733efbb05ae6e1c837320af21b92d96a806a6ed3fc7286933005aa0
7
+ data.tar.gz: 10d975ca6d8985ee36e044415ad1a95604ea6093cfbd973536beba0d9e98d10cabed1fa3b623aad709a9e6e632ec582d807996637b46bda686d5862fd14aa4ac
data/README.md CHANGED
@@ -28,25 +28,42 @@ Or install it yourself as:
28
28
  Now you can do activerecord like queries on the dataset. Currently it supports 3 methods:
29
29
 
30
30
  ```ruby
31
- #where_any({key: 'value', other_key: 'other_value'})
32
- #where_all({key: 'value', other_key: 'other_value'})
33
- #aggregate(:field1, :field2)
31
+ #where({key: 'value', other_key: 'other_value'}) (all conditions must be met)
32
+ #where_any({key: 'value', other_key: 'other_value'}) (like an 'or' condition)
33
+ #aggregate(:field1, :field2) (count unique values for each field)
34
34
  ```
35
35
 
36
36
  ```ruby
37
- # show users who have have any admin access
38
- my_data.where_any({admin: true, super_admin: true})
39
-
40
37
  # show users who are admin and named 'Mike'
41
- my_data.where_all({admin: true, first_name: 'Mike'})
38
+ my_data.where({admin: true, first_name: 'Mike'})
39
+
40
+ # show users who have admin access or have last name N-Z
41
+ my_data.where_any({admin: true, last_name: /^[n-zN-Z].*/})
42
42
 
43
43
  # give me a break down of orders by their delivery status for users named 'Mike'
44
- my_data.where_all({first_name: 'Mike'}).aggregate(:delivery_status)
45
- #=> {delivery_status: {placed: 10, processing: 22, shipped: 43, delivered: 8}}
44
+ my_data.where({first_name: 'Mike'}).aggregate(:delivery_status, :admin)
45
+ #=> {
46
+ delivery_status: {placed: 10, processing: 22, shipped: 43, delivered: 25},
47
+ admin: { true: 45, false: 55 }
48
+ }
46
49
  ```
47
50
 
48
51
  Maybe more will come...
49
52
 
53
+ Ok a little more...
54
+
55
+ Now supporting an option. `{smart: false}` - If you pass in a false flag, it will not try to convert time fields. Converting will happen internally on ingestion. So in other words, by default, it will take a string like `'2018-01-01'` and convert it to `1514764800`.
56
+
57
+ Also supporting multiple data types as queryable criteria. You can now do...
58
+
59
+ ```ruby
60
+ data.where({string_field: 'foo'}) # exact match
61
+ data.where({string_field: /\d+/}) # pattern match
62
+ data.where({array_field: ['a', 'b']}) # is one of
63
+ data.where({range_field: (start_day..end_day)}) # is within range, supports numbers/dates
64
+ ```
65
+
66
+
50
67
  ## Development
51
68
 
52
69
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
data/csv_orm-0.1.0.gem ADDED
Binary file
@@ -2,12 +2,13 @@ module CsvOrm
2
2
  class Ingestor
3
3
  attr_accessor :file, :path, :headers, :headers_defined, :data_set
4
4
 
5
- def initialize(file_path)
5
+ def initialize(file_path, options={})
6
6
  @path = File.expand_path(file_path)
7
7
  @file = File.open(path)
8
8
  @headers = [] # will define in first iteration of loop
9
9
  @headers_defined = false
10
10
  @data_set = []
11
+ @smart = options[:smart] == false ? false : true
11
12
  end
12
13
 
13
14
  def parse
@@ -15,10 +16,21 @@ module CsvOrm
15
16
  unless @headers_defined
16
17
  @headers = row.map {|header| header.gsub(' ', '_').downcase.to_sym }
17
18
  end
18
- @data_set << OpenStruct.new(Hash[headers.zip(row.map(&:to_s))]) if @headers_defined
19
+ parsed_row = row.map {|field| infer_data_type(field) }
20
+ @data_set << OpenStruct.new(Hash[headers.zip(parsed_row)]) if @headers_defined
19
21
  @headers_defined = true
20
22
  end
21
23
  @data_set
22
24
  end
25
+
26
+ def infer_data_type(field)
27
+ # currently supporting time to integer conversion
28
+ return field.to_s unless @smart
29
+ date = DateTime.parse(field) rescue nil
30
+ if date
31
+ return date.to_time.to_i
32
+ end
33
+ field.to_s
34
+ end
23
35
  end
24
36
  end
data/lib/csv_orm/query.rb CHANGED
@@ -22,18 +22,70 @@ module CsvOrm
22
22
  agg
23
23
  end
24
24
 
25
+ def explain(method, args)
26
+ case
27
+ when method == :where
28
+ build_expression('&&', args)
29
+ when method == :where_any
30
+ build_expression('||', args)
31
+ else
32
+ 'not supported at this time'
33
+ end
34
+ end
35
+
25
36
  def where_any(attrs)
26
37
  expression = build_expression('||', attrs)
27
38
  self.class.new(@data.select {|row| eval(expression)});
28
39
  end
29
40
 
30
- def where_all(attrs)
41
+ def where(attrs)
31
42
  expression = build_expression('&&', attrs)
32
43
  self.class.new(@data.select {|row| eval(expression)});
33
44
  end
34
45
 
46
+ def not(attrs)
47
+ expression = build_expression('&&', attrs)
48
+ self.class.new(@data.reject {|row| eval(expression)});
49
+ end
50
+
51
+ def parse_range_values(range)
52
+ exclude = range.exclude_end?
53
+ first, last = DateTime.parse(range.begin), DateTime.parse(range.end) rescue nil
54
+
55
+ if first && last
56
+ Range.new(first.to_time.to_i, last.to_time.to_i, exclude)
57
+ else
58
+ range
59
+ end
60
+ end
61
+
62
+ def build_expression_part(key, value)
63
+ case
64
+ when value.class == String
65
+ "row.send(:#{key}) == '#{value}'"
66
+ when value.class == Regexp
67
+ "row.send(:#{key}).match(/#{value.source}/)"
68
+ when value.class == Array
69
+ "#{value}.include?(row.send(:#{key}))"
70
+ when value.class == Range
71
+ parsed_range_value = parse_range_values(value)
72
+ "(#{parsed_range_value}).cover?(row.send(:#{key}))"
73
+ when [TrueClass, FalseClass].include?(value.class)
74
+ "row.send(:#{key}) == '#{value.to_s}'"
75
+ end
76
+ end
77
+
35
78
  def build_expression(conditional, attrs)
36
- attrs.each_with_object([]) { |(k,v), array| array << "row.send(:#{k}) == '#{v}'" }.join(" #{conditional} ")
79
+ string = ''
80
+ is_first = true
81
+
82
+ attrs.each do |k, v|
83
+ string << " #{conditional} " unless is_first
84
+ string << build_expression_part(k, v)
85
+
86
+ is_first = false
87
+ end
88
+ string
37
89
  end
38
90
  end
39
91
  end
@@ -1,3 +1,3 @@
1
1
  module CsvOrm
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv_orm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mike Lerner
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-02-24 00:00:00.000000000 Z
11
+ date: 2019-02-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -112,6 +112,7 @@ files:
112
112
  - Rakefile
113
113
  - bin/console
114
114
  - bin/setup
115
+ - csv_orm-0.1.0.gem
115
116
  - csv_orm.gemspec
116
117
  - lib/csv_orm.rb
117
118
  - lib/csv_orm/ingestor.rb