incsv 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f1fc0abb9bb7e012da9f2be37f4fa79b1b45f371
4
- data.tar.gz: 7bde51d01a7db2e16f9805b45dc4391703b64792
3
+ metadata.gz: a48352c382c9de59e29eb0f294f75f68cc7f8de0
4
+ data.tar.gz: 749abf80150e9181ef61c0d85746ed685459a754
5
5
  SHA512:
6
- metadata.gz: 1b23fe97b8a38cc505ddf64c795798941a21b347d882b8976c3f68e1ff768fb1b8e1acfd600172c617af78d8563a2b2877f641308f3357ac8764fde95329f1b2
7
- data.tar.gz: 6dd334b65735ba34da45ac131de1bf8220081f2292521a37be4c488a017de4db697d9d2b56dc0d220a0dedba4c70ed1ebc82bd3168e28eb8457d0947c4381d96
6
+ metadata.gz: e9b77efaf5628776671a0c113f8082335702f33ececab4e62c9f21cd1013d65bbc12f2be5b7e8f769cad9257ec2d5387ea6655b561a1803cc142613bc44e762f
7
+ data.tar.gz: 909a26477f6065c4a7ff04bc54f38e74c9a6fff4d32e94d73bb15b38f841ace0f120ac483b7b3e89daa9008272a117d64a9c72c4d3452694f455e865c3830aea
data/.yardopts ADDED
@@ -0,0 +1,6 @@
1
+ --protected
2
+ --no-private
3
+ --exclude /spec/
4
+ --exclude /bin/
5
+ -
6
+ README.md
data/README.md CHANGED
@@ -7,8 +7,8 @@ It works by loading the CSV into an [SQLite][] database and then
7
7
  dropping you into an interactive Ruby shell. You can then use the
8
8
  [Sequel][] database library to perform further exploratory analysis.
9
9
 
10
- [sqlite]: https://www.sqlite.org/
11
- [sequel]: http://sequel.jeremyevans.net/
10
+ [SQLite]: https://www.sqlite.org/
11
+ [Sequel]: http://sequel.jeremyevans.net/
12
12
 
13
13
  ## Installation
14
14
 
@@ -38,7 +38,7 @@ A quick example:
38
38
  {:name=>"enhanced targeting card"},
39
39
  {:name=>"Giddyup Buttercup"}]
40
40
 
41
- [repl]: https://en.wikipedia.org/wiki/Read%E2%80%93eval%E2%80%93print_loop
41
+ [REPL]: https://en.wikipedia.org/wiki/Read%E2%80%93eval%E2%80%93print_loop
42
42
 
43
43
  ### The less-quick version
44
44
 
data/exe/incsv CHANGED
@@ -8,6 +8,9 @@ require "pry"
8
8
  require "incsv"
9
9
 
10
10
  module InCSV
11
+ # A cut-down class, the binding of which is used for the REPL console.
12
+ # Any methods and instance variables defined here, therefore, are
13
+ # accessible from the console.
11
14
  class Console
12
15
  def initialize(db)
13
16
  @db = db
@@ -18,6 +21,7 @@ module InCSV
18
21
  end
19
22
  end
20
23
 
24
+ # The command-line interface to InCSV.
21
25
  class CLI < Thor
22
26
  desc "create CSV_FILE", "Creates a database file with the appropriate schema for the given CSV file, but doesn't import any data."
23
27
  method_option :force, type: :boolean, default: false
data/incsv.gemspec CHANGED
@@ -22,6 +22,7 @@ Gem::Specification.new do |spec|
22
22
  spec.add_development_dependency "bundler", "~> 1.11"
23
23
  spec.add_development_dependency "rake", "~> 10.0"
24
24
  spec.add_development_dependency "rspec", "~> 3.0"
25
+ spec.add_development_dependency "yard", "~> 0.8"
25
26
 
26
27
  spec.add_runtime_dependency "thor", "~> 0.19.1"
27
28
  spec.add_runtime_dependency "pry", "~> 0.10"
@@ -1,9 +1,28 @@
1
1
  module InCSV
2
+ # An abstract class, inherited by all types of column. Specifies the
3
+ # interface that all these classes must adhere to.
2
4
  class ColumnType
5
+ # A symbol representation of what type of data this ColumnType
6
+ # represents. By default this is taken from the class name (so this
7
+ # class would be :columntype).
3
8
  def self.name
4
9
  self.to_s.sub(/.*::/, "").downcase.to_sym
5
10
  end
6
11
 
12
+ # The type of the column from the perspective of the database. By
13
+ # default this is the same as the class name, so a column of type
14
+ # String would go into the database as a :string.
15
+ #
16
+ # Possible column types can be found here:
17
+ #
18
+ # http://sequel.jeremyevans.net/rdoc/files/doc/schema_modification_rdoc.html#label-Column+types
19
+ #
20
+ # This can also be a string, for database-specific features or in
21
+ # order to specify lengths easily. Examples might be:
22
+ #
23
+ # VARCHAR(255)
24
+ # DECIMAL(10, 2)
25
+ # BOOLEAN
7
26
  def self.for_database
8
27
  self.to_s.sub(/.*::/, "").downcase.to_sym
9
28
  end
@@ -12,14 +31,19 @@ module InCSV
12
31
  @value = value
13
32
  end
14
33
 
34
+ # Returns true if the given value (supplied in the constructor)
35
+ # is of the type represented by this column; returns false
36
+ # otherwise.
15
37
  def match?
16
38
  false
17
39
  end
18
40
 
41
+ # Returns a cleaned/preprocessed version of the given value.
19
42
  def clean_value
20
43
  self.class.clean_value(@value)
21
44
  end
22
45
 
46
+ # Returns a cleaned/preprocessed version of an arbitrary value.
23
47
  def self.clean_value(value)
24
48
  value
25
49
  end
@@ -3,6 +3,9 @@ require "sequel"
3
3
  require "pathname"
4
4
 
5
5
  module InCSV
6
+ # Represents a database file, handling the creation of the database
7
+ # and of the table within the database, as well as the importing of
8
+ # data from a CSV file into the database.
6
9
  class Database
7
10
  def initialize(csv)
8
11
  @csv = csv
@@ -14,29 +17,47 @@ module InCSV
14
17
 
15
18
  attr_reader :db
16
19
 
20
+ # Returns true if the primary database table within the database has
21
+ # been created.
17
22
  def table_created?
18
23
  @db.table_exists?(table_name)
19
24
  end
20
25
 
26
+ # Returns true if there is data in the primary table. There are
27
+ # perhaps more accurate ways to calculate this, but only by
28
+ # comparing samples from the CSV to the table; this is faster and
29
+ # will in practice be accurate.
21
30
  def imported?
22
31
  table_created? && @db[table_name].count > 0
23
32
  end
24
33
 
34
+ # Returns true if the database file exists; makes no effort to check
35
+ # whether it is in fact a valid SQLite database.
25
36
  def exists?
26
37
  File.exist?(db_path)
27
38
  end
28
39
 
40
+ # Returns the path to the database file, generated based on the
41
+ # filename of the CSV passed to the class. For example, a CSV called
42
+ # `products.csv` will be stored in a database called `products.db`
43
+ # in the same directory.
29
44
  def db_path
30
45
  path = Pathname(csv)
31
46
  (path.dirname + (path.basename(".csv").to_s + ".db")).to_s
32
47
  end
33
48
 
49
+ # Returns the table name, by default generated based on the filename
50
+ # of the CSV. For example, a CSV called `products.csv` will produce
51
+ # a table called `products`.
34
52
  def table_name
35
53
  @table_name ||= begin
36
54
  File.basename(csv, ".csv").downcase.gsub(/[^a-z_]/, "").to_sym
37
55
  end
38
56
  end
39
57
 
58
+ # Creates a table in the database, with one column in the database
59
+ # for each column in the CSV, the type of which is the best guess
60
+ # for the data found in that column in the CSV data.
40
61
  def create_table
41
62
  @db.create_table!(table_name) do
42
63
  primary_key :_incsv_id
@@ -49,6 +70,11 @@ module InCSV
49
70
  end
50
71
  end
51
72
 
73
+ # Imports data from the CSV file into the database, applying any
74
+ # preprocessing specified by the column type (e.g. stripping
75
+ # currency prefixes).
76
+ #
77
+ # Data is imported in transactions, in chunks of 200 rows at a time.
52
78
  def import
53
79
  return if imported?
54
80
 
data/lib/incsv/schema.rb CHANGED
@@ -1,11 +1,15 @@
1
1
  require "csv"
2
2
 
3
3
  module InCSV
4
+ # Given a CSV file, samples data from it in order to establish what
5
+ # data types its columns are.
4
6
  class Schema
5
7
  def initialize(csv)
6
8
  @csv = csv
7
9
  end
8
10
 
11
+ # Returns the column types found in the CSV. Memoises the result, so
12
+ # can be called repeatedly.
9
13
  def columns
10
14
  @columns ||= parsed_columns
11
15
  end
@@ -14,6 +18,10 @@ module InCSV
14
18
 
15
19
  attr_reader :csv
16
20
 
21
+ # Returns an array with one element for each column in the CSV. The
22
+ # value is a Column object, which has responsibility for determining
23
+ # the type of the data stored in the column; a sample of 50 rows
24
+ # from the column is provided to the Column class for this purpose.
17
25
  def parsed_columns
18
26
  samples(50).map do |name, values|
19
27
  Column.new(name, values)
@@ -1,16 +1,30 @@
1
1
  module InCSV
2
2
  module Types
3
+ # Represents a currency value, without its symbol/identifier, stored
4
+ # as a DECIMAL(10, 2) to avoid rounding errors.
5
+ #
6
+ # Not storing the identifier is an issue that should be resolved at
7
+ # some point, ideally; it's obviously an issue in files that have
8
+ # multiple currencies in the same column.
3
9
  class Currency < ColumnType
10
+ # A regular expression which matches all supported currency types.
4
11
  MATCH_EXPRESSION = /\A(\$|£)([0-9,\.]+)\z/
5
12
 
13
+ # What type of column to create in the database.
6
14
  def self.for_database
7
15
  "DECIMAL(10,2)"
8
16
  end
9
17
 
18
+ # Returns true if the given value is a supported currency type, or
19
+ # false otherwise.
10
20
  def match?
11
21
  value.strip.match(MATCH_EXPRESSION)
12
22
  end
13
23
 
24
+ # Strip the currency symbol, and remove any comma separators. This
25
+ # creates an issue with locales other than English, in which
26
+ # commas are used for decimal points, but this will work for
27
+ # English.
14
28
  def self.clean_value(value)
15
29
  return unless value
16
30
 
@@ -1,5 +1,7 @@
1
1
  module InCSV
2
2
  module Types
3
+ # Represents an ISO 8601-format date without any timestamp element,
4
+ # e.g. 2016-01-01.
3
5
  class Date < ColumnType
4
6
  def match?
5
7
  value.strip.match(/\A[0-9]{4}-[0-9]{2}-[0-9]{2}\z/)
@@ -1,6 +1,15 @@
1
1
  module InCSV
2
2
  module Types
3
+ # Represents a String, stored in the database as a VARCHAR(255).
4
+ # This is the fallback datatype, used for anything that doesn't
5
+ # match any of the other more specific types. Its matching logic is
6
+ # therefore simple: it matches anything. For this reason it must be
7
+ # matched last; this is achieved via require order.
3
8
  class String < ColumnType
9
+ def self.for_database
10
+ "TEXT"
11
+ end
12
+
4
13
  def match?
5
14
  true
6
15
  end
data/lib/incsv/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module InCSV
2
- VERSION = "0.2.1"
2
+ VERSION = "0.2.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: incsv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Miller
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-02-22 00:00:00.000000000 Z
11
+ date: 2016-03-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: '3.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: yard
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0.8'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0.8'
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: thor
57
71
  requirement: !ruby/object:Gem::Requirement
@@ -120,6 +134,7 @@ files:
120
134
  - ".gitignore"
121
135
  - ".rspec"
122
136
  - ".travis.yml"
137
+ - ".yardopts"
123
138
  - CODE_OF_CONDUCT.md
124
139
  - Gemfile
125
140
  - LICENSE.txt
@@ -164,3 +179,4 @@ signing_key:
164
179
  specification_version: 4
165
180
  summary: A tool for interrogating CSV data using SQLite and Sequel.
166
181
  test_files: []
182
+ has_rdoc: