incsv 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f1fc0abb9bb7e012da9f2be37f4fa79b1b45f371
4
- data.tar.gz: 7bde51d01a7db2e16f9805b45dc4391703b64792
3
+ metadata.gz: a48352c382c9de59e29eb0f294f75f68cc7f8de0
4
+ data.tar.gz: 749abf80150e9181ef61c0d85746ed685459a754
5
5
  SHA512:
6
- metadata.gz: 1b23fe97b8a38cc505ddf64c795798941a21b347d882b8976c3f68e1ff768fb1b8e1acfd600172c617af78d8563a2b2877f641308f3357ac8764fde95329f1b2
7
- data.tar.gz: 6dd334b65735ba34da45ac131de1bf8220081f2292521a37be4c488a017de4db697d9d2b56dc0d220a0dedba4c70ed1ebc82bd3168e28eb8457d0947c4381d96
6
+ metadata.gz: e9b77efaf5628776671a0c113f8082335702f33ececab4e62c9f21cd1013d65bbc12f2be5b7e8f769cad9257ec2d5387ea6655b561a1803cc142613bc44e762f
7
+ data.tar.gz: 909a26477f6065c4a7ff04bc54f38e74c9a6fff4d32e94d73bb15b38f841ace0f120ac483b7b3e89daa9008272a117d64a9c72c4d3452694f455e865c3830aea
data/.yardopts ADDED
@@ -0,0 +1,6 @@
1
+ --protected
2
+ --no-private
3
+ --exclude /spec/
4
+ --exclude /bin/
5
+ -
6
+ README.md
data/README.md CHANGED
@@ -7,8 +7,8 @@ It works by loading the CSV into an [SQLite][] database and then
7
7
  dropping you into an interactive Ruby shell. You can then use the
8
8
  [Sequel][] database library to perform further exploratory analysis.
9
9
 
10
- [sqlite]: https://www.sqlite.org/
11
- [sequel]: http://sequel.jeremyevans.net/
10
+ [SQLite]: https://www.sqlite.org/
11
+ [Sequel]: http://sequel.jeremyevans.net/
12
12
 
13
13
  ## Installation
14
14
 
@@ -38,7 +38,7 @@ A quick example:
38
38
  {:name=>"enhanced targeting card"},
39
39
  {:name=>"Giddyup Buttercup"}]
40
40
 
41
- [repl]: https://en.wikipedia.org/wiki/Read%E2%80%93eval%E2%80%93print_loop
41
+ [REPL]: https://en.wikipedia.org/wiki/Read%E2%80%93eval%E2%80%93print_loop
42
42
 
43
43
  ### The less-quick version
44
44
 
data/exe/incsv CHANGED
@@ -8,6 +8,9 @@ require "pry"
8
8
  require "incsv"
9
9
 
10
10
  module InCSV
11
+ # A cut-down class, the binding of which is used for the REPL console.
12
+ # Any methods and instance variables defined here, therefore, are
13
+ # accessible from the console.
11
14
  class Console
12
15
  def initialize(db)
13
16
  @db = db
@@ -18,6 +21,7 @@ module InCSV
18
21
  end
19
22
  end
20
23
 
24
+ # The command-line interface to InCSV.
21
25
  class CLI < Thor
22
26
  desc "create CSV_FILE", "Creates a database file with the appropriate schema for the given CSV file, but doesn't import any data."
23
27
  method_option :force, type: :boolean, default: false
data/incsv.gemspec CHANGED
@@ -22,6 +22,7 @@ Gem::Specification.new do |spec|
22
22
  spec.add_development_dependency "bundler", "~> 1.11"
23
23
  spec.add_development_dependency "rake", "~> 10.0"
24
24
  spec.add_development_dependency "rspec", "~> 3.0"
25
+ spec.add_development_dependency "yard", "~> 0.8"
25
26
 
26
27
  spec.add_runtime_dependency "thor", "~> 0.19.1"
27
28
  spec.add_runtime_dependency "pry", "~> 0.10"
@@ -1,9 +1,28 @@
1
1
  module InCSV
2
+ # An abstract class, inherited by all types of column. Specifies the
3
+ # interface that all these classes must adhere to.
2
4
  class ColumnType
5
+ # A symbol representation of what type of data this ColumnType
6
+ # represents. By default this is taken from the class name (so this
7
+ # class would be :columntype).
3
8
  def self.name
4
9
  self.to_s.sub(/.*::/, "").downcase.to_sym
5
10
  end
6
11
 
12
+ # The type of the column from the perspective of the database. By
13
+ # default this is the same as the class name, so a column of type
14
+ # String would go into the database as a :string.
15
+ #
16
+ # Possible column types can be found here:
17
+ #
18
+ # http://sequel.jeremyevans.net/rdoc/files/doc/schema_modification_rdoc.html#label-Column+types
19
+ #
20
+ # This can also be a string, for database-specific features or in
21
+ # order to specify lengths easily. Examples might be:
22
+ #
23
+ # VARCHAR(255)
24
+ # DECIMAL(10, 2)
25
+ # BOOLEAN
7
26
  def self.for_database
8
27
  self.to_s.sub(/.*::/, "").downcase.to_sym
9
28
  end
@@ -12,14 +31,19 @@ module InCSV
12
31
  @value = value
13
32
  end
14
33
 
34
+ # Returns true if the given value (supplied in the constructor)
35
+ # is of the type represented by this column; returns false
36
+ # otherwise.
15
37
  def match?
16
38
  false
17
39
  end
18
40
 
41
+ # Returns a cleaned/preprocessed version of the given value.
19
42
  def clean_value
20
43
  self.class.clean_value(@value)
21
44
  end
22
45
 
46
+ # Returns a cleaned/preprocessed version of an arbitrary value.
23
47
  def self.clean_value(value)
24
48
  value
25
49
  end
@@ -3,6 +3,9 @@ require "sequel"
3
3
  require "pathname"
4
4
 
5
5
  module InCSV
6
+ # Represents a database file, handling the creation of the database
7
+ # and of the table within the database, as well as the importing of
8
+ # data from a CSV file into the database.
6
9
  class Database
7
10
  def initialize(csv)
8
11
  @csv = csv
@@ -14,29 +17,47 @@ module InCSV
14
17
 
15
18
  attr_reader :db
16
19
 
20
+ # Returns true if the primary database table within the database has
21
+ # been created.
17
22
  def table_created?
18
23
  @db.table_exists?(table_name)
19
24
  end
20
25
 
26
+ # Returns true if there is data in the primary table. There are
27
+ # perhaps more accurate ways to calculate this, but only by
28
+ # comparing samples from the CSV to the table; this is faster and
29
+ # will in practice be accurate.
21
30
  def imported?
22
31
  table_created? && @db[table_name].count > 0
23
32
  end
24
33
 
34
+ # Returns true if the database file exists; makes no effort to check
35
+ # whether it is in fact a valid SQLite database.
25
36
  def exists?
26
37
  File.exist?(db_path)
27
38
  end
28
39
 
40
+ # Returns the path to the database file, generated based on the
41
+ # filename of the CSV passed to the class. For example, a CSV called
42
+ # `products.csv` will be stored in a database called `products.db`
43
+ # in the same directory.
29
44
  def db_path
30
45
  path = Pathname(csv)
31
46
  (path.dirname + (path.basename(".csv").to_s + ".db")).to_s
32
47
  end
33
48
 
49
+ # Returns the table name, by default generated based on the filename
50
+ # of the CSV. For example, a CSV called `products.csv` will produce
51
+ # a table called `products`.
34
52
  def table_name
35
53
  @table_name ||= begin
36
54
  File.basename(csv, ".csv").downcase.gsub(/[^a-z_]/, "").to_sym
37
55
  end
38
56
  end
39
57
 
58
+ # Creates a table in the database, with one column in the database
59
+ # for each column in the CSV, the type of which is the best guess
60
+ # for the data found in that column in the CSV data.
40
61
  def create_table
41
62
  @db.create_table!(table_name) do
42
63
  primary_key :_incsv_id
@@ -49,6 +70,11 @@ module InCSV
49
70
  end
50
71
  end
51
72
 
73
+ # Imports data from the CSV file into the database, applying any
74
+ # preprocessing specified by the column type (e.g. stripping
75
+ # currency prefixes).
76
+ #
77
+ # Data is imported in transactions, in chunks of 200 rows at a time.
52
78
  def import
53
79
  return if imported?
54
80
 
data/lib/incsv/schema.rb CHANGED
@@ -1,11 +1,15 @@
1
1
  require "csv"
2
2
 
3
3
  module InCSV
4
+ # Given a CSV file, samples data from it in order to establish what
5
+ # data types its columns are.
4
6
  class Schema
5
7
  def initialize(csv)
6
8
  @csv = csv
7
9
  end
8
10
 
11
+ # Returns the column types found in the CSV. Memoises the result, so
12
+ # can be called repeatedly.
9
13
  def columns
10
14
  @columns ||= parsed_columns
11
15
  end
@@ -14,6 +18,10 @@ module InCSV
14
18
 
15
19
  attr_reader :csv
16
20
 
21
+ # Returns an array with one element for each column in the CSV. The
22
+ # value is a Column object, which has responsibility for determining
23
+ # the type of the data stored in the column; a sample of 50 rows
24
+ # from the column is provided to the Column class for this purpose.
17
25
  def parsed_columns
18
26
  samples(50).map do |name, values|
19
27
  Column.new(name, values)
@@ -1,16 +1,30 @@
1
1
  module InCSV
2
2
  module Types
3
+ # Represents a currency value, without its symbol/identifier, stored
4
+ # as a DECIMAL(10, 2) to avoid rounding errors.
5
+ #
6
+ # Not storing the identifier is an issue that should be resolved at
7
+ # some point, ideally; it's obviously an issue in files that have
8
+ # multiple currencies in the same column.
3
9
  class Currency < ColumnType
10
+ # A regular expression which matches all supported currency types.
4
11
  MATCH_EXPRESSION = /\A(\$|£)([0-9,\.]+)\z/
5
12
 
13
+ # What type of column to create in the database.
6
14
  def self.for_database
7
15
  "DECIMAL(10,2)"
8
16
  end
9
17
 
18
+ # Returns true if the given value is a supported currency type, or
19
+ # false otherwise.
10
20
  def match?
11
21
  value.strip.match(MATCH_EXPRESSION)
12
22
  end
13
23
 
24
+ # Strip the currency symbol, and remove any comma separators. This
25
+ # creates an issue with locales other than English, in which
26
+ # commas are used for decimal points, but this will work for
27
+ # English.
14
28
  def self.clean_value(value)
15
29
  return unless value
16
30
 
@@ -1,5 +1,7 @@
1
1
  module InCSV
2
2
  module Types
3
+ # Represents an ISO 8601-format date without any timestamp element,
4
+ # e.g. 2016-01-01.
3
5
  class Date < ColumnType
4
6
  def match?
5
7
  value.strip.match(/\A[0-9]{4}-[0-9]{2}-[0-9]{2}\z/)
@@ -1,6 +1,15 @@
1
1
  module InCSV
2
2
  module Types
3
+ # Represents a String, stored in the database as a VARCHAR(255).
4
+ # This is the fallback datatype, used for anything that doesn't
5
+ # match any of the other more specific types. Its matching logic is
6
+ # therefore simple: it matches anything. For this reason it must be
7
+ # matched last; this is achieved via require order.
3
8
  class String < ColumnType
9
+ def self.for_database
10
+ "TEXT"
11
+ end
12
+
4
13
  def match?
5
14
  true
6
15
  end
data/lib/incsv/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module InCSV
2
- VERSION = "0.2.1"
2
+ VERSION = "0.2.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: incsv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Miller
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-02-22 00:00:00.000000000 Z
11
+ date: 2016-03-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: '3.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: yard
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0.8'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0.8'
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: thor
57
71
  requirement: !ruby/object:Gem::Requirement
@@ -120,6 +134,7 @@ files:
120
134
  - ".gitignore"
121
135
  - ".rspec"
122
136
  - ".travis.yml"
137
+ - ".yardopts"
123
138
  - CODE_OF_CONDUCT.md
124
139
  - Gemfile
125
140
  - LICENSE.txt
@@ -164,3 +179,4 @@ signing_key:
164
179
  specification_version: 4
165
180
  summary: A tool for interrogating CSV data using SQLite and Sequel.
166
181
  test_files: []
182
+ has_rdoc: