slyce 1.1.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (7) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +65 -1
  3. data/bin/slyce +1 -1
  4. data/bin/slyce3 +0 -2
  5. data/bin/slyced +147 -0
  6. data/slyce.gemspec +1 -0
  7. metadata +18 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6a2d1af8d0c1835beb4e57d013704dc791ac769a71ed1c507b1f1a39be4b2ace
4
- data.tar.gz: 0eb2255e8effcb0a12a72cb3dc020ebe35943f73b181f511ec865e168604b699
3
+ metadata.gz: f40b8dfb3127b82f3c9ea3877afb4be3e2c1e93a70fc6308bd4710978eb8299a
4
+ data.tar.gz: d328c938fcb4b833aed2a8c2772ce2eebc340aa6c620211150abaf29e6fae7a1
5
5
  SHA512:
6
- metadata.gz: 3c8c2e3ae1862d9a549a38f01fc2f438d736a85ff08f6a072e18894b93a8081317fa50293f382bbadd4b64ba21f8a37cbd3b0c383f8c2e0dfdb2cf16c966c49a
7
- data.tar.gz: d3895d08a1c90e110906f29d74fd0623e715b933c1b5de7a369a8f47d149bc955e22e79ee17ebee0209dc429db6683535baed77dc59f874a91438309b1e17922
6
+ metadata.gz: 8bb4e9abe474a99ff73833a079faa0b1d485274ef5b6ff146b48114b207d997cbe4b9590e0bad87da628f263ec8c01c73e0138be760708fb4a5023d63a16a1af
7
+ data.tar.gz: a920d2313061b4a6275ac6a0392ba6197f003482f011a1de80040f35f9db72fd240d612b2c94bd30f918f96c5736d02dab9691a7955c580a05c72f3e4fec4708
data/README.md CHANGED
@@ -1,2 +1,66 @@
1
1
  # slyce
2
- Ruby utility to show data statistics for MySQL databases
2
+
3
+ Ruby utility to show summary statistics or export data from MySQL, SQLite, or DuckDB.
4
+
5
+ ## Supported platforms
6
+
7
+ ### MySQL
8
+
9
+ ### SQLite
10
+
11
+ ### DuckDB
12
+
13
+ Ensure the `duckdb` gem is installed via:
14
+
15
+ `gem install duckdb`
16
+
17
+ ## Example
18
+
19
+ The following will open `my-database.duck` and will export the first 10 rows
20
+ with the `last_name` field equal to `Miller` and it will extract six fields
21
+ starting with `id,first_name,...`. The output will be a `csv` file to `STDOUT`.
22
+
23
+ ```slyced my-database.duck members -r 10 -w "last_name='Miller'" -xid,first_name,city,state,zip,medical_plan --csv```
24
+
25
+ The following will show the most frequent 5 values from the `temp_table` table
26
+ in the `nearsite` database for these fields: `ef_id,external_id,insurance_plan`.
27
+ In addition, only rows with the condition specified in the `-w` option are shown.
28
+
29
+ ```slyce nearsite temp_table -s 10 -w "ef_id>=251 and benefit_status='X'" -x ef_id,external_id,insurance_plan -r 5```
30
+
31
+ The resulting table looks like:
32
+
33
+ ```text
34
+ ef_id
35
+ =====
36
+ 781 255
37
+ 35 253
38
+ 31 510
39
+ 19 251
40
+ 5 258
41
+ -----
42
+ 871 shown (top 5)
43
+ 879 total (all 10)
44
+
45
+ external_id
46
+ ===========
47
+ 816 NULL
48
+ 1 1235
49
+ 1 1266
50
+ 1 1320
51
+ 1 1548
52
+ -----
53
+ 820 shown (top 5)
54
+ 879 total (all 64)
55
+
56
+ insurance_plan
57
+ ==============
58
+ 176 Aetna PPO
59
+ 144 Aetna HSA
60
+ 103 NULL
61
+ 82 Kaiser Northern CA
62
+ 73
63
+ -----
64
+ 578 shown (top 5)
65
+ 879 total (all 35)
66
+ ```
data/bin/slyce CHANGED
@@ -11,7 +11,7 @@ dbas = nil
11
11
  tabl = nil
12
12
 
13
13
  OptionParser.new.instance_eval do
14
- @version = "1.1.0"
14
+ @version = "1.2.1"
15
15
  @banner = "usage: #{program_name} [options] <database> <table>"
16
16
 
17
17
  on "--csv" , "Output comma separated values"
data/bin/slyce3 CHANGED
@@ -9,8 +9,6 @@
9
9
  # wget https://github.com/nalgeon/sqlean/releases/download/0.19.3/sqlean-macos-arm64.zip
10
10
  # unzip sqlean-macos-arm64.zip regexp.dylib
11
11
 
12
- VERSION="1.0.2"
13
-
14
12
  STDOUT.sync = true
15
13
 
16
14
  require "extralite"
data/bin/slyced ADDED
@@ -0,0 +1,147 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ STDOUT.sync = true
4
+
5
+ require "duckdb"
6
+ require "optparse"
7
+
8
+ trap("INT" ) { abort "\n" }
9
+
10
+ dbas = nil
11
+ tabl = nil
12
+
13
+ OptionParser.new.instance_eval do
14
+ @version = "1.2.1"
15
+ @banner = "usage: #{program_name} [options] <database> <table>"
16
+
17
+ on "--csv" , "Output comma separated values"
18
+ on "--psv" , "Output pipe separated values"
19
+ on "--tsv" , "Output tab separated values"
20
+ on "-a", "--ascii" , "Convert data to ASCII using AnyAscii"
21
+ on "-c", "--columns" , "Display column names and quit"
22
+ on "-h", "--help" , "Show help and command usage" do Kernel.abort to_s; end
23
+ on "-n", "--natural" , "Sort naturally, not numerically"
24
+ on "-r", "--rows <count>" , "Rows of data to select", Integer
25
+ on "-s", "--suppress" , "Suppress header when exporting delimited files"
26
+ on "-v", "--version" , "Show version number" do Kernel.abort "#{program_name} #{@version}"; end
27
+ on "-w", "--where <cond>" , "Where clause (eg - 'age>50 and state='AZ')"
28
+ on "-x", "--extract <col1,col2,...>", "Comma separated list of columns to extract"
29
+
30
+ self
31
+ end.parse!(into: opts={}) rescue abort($!.message)
32
+
33
+ xcsv = opts[:csv]
34
+ xpsv = opts[:psv]
35
+ xtsv = opts[:tsv]
36
+ xprt = xcsv || xpsv || xtsv and require "censive"
37
+
38
+ asky = opts[:ascii ] and require "any_ascii"
39
+ filt = opts[:where ] and filt = "where\n #{filt}"
40
+ hide = opts[:suppress]
41
+ natu = opts[:natural ]
42
+ show = opts[:rows ]
43
+ want = opts[:extract].to_s.downcase.split(",")
44
+
45
+ dbas ||= ARGV.shift or abort "no database given"
46
+ tabl ||= ARGV.shift or abort "no table given"
47
+
48
+ [xcsv, xpsv, xtsv].compact.size > 1 and abort "only one of csv, psv, or tsv allowed"
49
+
50
+ # ==[ Helpers ]==
51
+
52
+ class DuckDB::Connection
53
+ alias_method :sql, :query
54
+
55
+ def sql!(stmt, *args, **, &)
56
+ puts "\n==[ SQL statement ]==\n\n", stmt.strip, ";"
57
+ sql(stmt, *args, **, &)
58
+ end
59
+ end
60
+
61
+ def display(name, data, show, uniq, tots)
62
+ seen = data.inject(0) {|seen, coun| seen += coun[0] }
63
+ rows = [data.size, seen].min
64
+ wide = tots.to_s.size
65
+ fill = " " * wide
66
+ line = "=" * name.size
67
+
68
+ puts "\n#{fill} #{name}\n#{fill} #{line}\n"
69
+ data.each {|cnt, val| puts "%*d %s" % [wide, cnt, val || "NULL"] }
70
+ puts "#{fill} -----\n"
71
+ puts "%*d shown (top %d)" % [wide, seen, rows] if show && rows > 1
72
+ puts "%*d total (all %d)" % [wide, tots, uniq] if uniq > 1
73
+ puts "%*d total" % [wide, tots ] unless uniq > 1
74
+ end
75
+
76
+ # ==[ Let 'er rip! ]==
77
+
78
+ conn = DuckDB::Database.open(dbas).connect
79
+ resu = conn.query(<<~end)
80
+ select column_name
81
+ from information_schema.columns
82
+ where table_name='#{tabl}'
83
+ order by ordinal_position
84
+ end
85
+ cols = resu.to_a.flatten
86
+ want = want.empty? ? cols : want & cols
87
+
88
+ if opts[:columns]
89
+ puts cols
90
+ exit
91
+ end
92
+
93
+ if want.empty?
94
+ abort "no columns are selected"
95
+ end
96
+
97
+ # handle exports
98
+ if xprt
99
+ list = want.map {|item| "\"#{item}\"" }.join(", ")
100
+ stmt = show ? "limit #{show}" : ""
101
+ data = conn.sql(<<~"" + stmt).to_a
102
+ select
103
+ #{list}
104
+ from
105
+ "#{tabl}"
106
+ #{filt}
107
+
108
+ seps = xcsv ? "," : xtsv ? "\t" : xpsv ? "|" : abort("unknown separator #{seps.inspect}")
109
+
110
+ Censive.writer(sep: seps) do |csv|
111
+ csv << want unless hide
112
+ data.each do |row|
113
+ csv << row.map {|e| asky ? AnyAscii.transliterate(e.to_s) : e.to_s }
114
+ end
115
+ end
116
+
117
+ exit
118
+ end
119
+
120
+ want.each do |name|
121
+ sort = natu ? "" : "cnt desc,"
122
+ stmt = show ? "limit #{show}" : ""
123
+ data = conn.sql(<<~"" + stmt).to_a
124
+ select
125
+ count(*) as cnt,
126
+ "#{name}" as val
127
+ from
128
+ "#{tabl}"
129
+ #{filt}
130
+ group by
131
+ val
132
+ order by #{sort}
133
+ if(regexp_matches("#{name}", '^[-+]?((0|([1-9]\\d*)(\\.\\d*)?)|((0|([1-9]\\d*))\\.\\d+))$'),cast("#{name}" as double),null) nulls last,
134
+ if(regexp_matches("#{name}", '^0\\d*$'),length("#{name}"),null) nulls last,
135
+ if(regexp_matches("#{name}", '^\\d+\\D'),length(regexp_extract("#{name}",'^(\\d+)',1)),null) nulls last,
136
+ "#{name}" is null, "#{name}"
137
+
138
+ uniq, tots = conn.sql(<<~"").to_a[0]
139
+ select
140
+ count(distinct(ifnull("#{name}",0))),
141
+ count(ifnull("#{name}",0))
142
+ from
143
+ "#{tabl}"
144
+ #{filt}
145
+
146
+ display(name, data, show, uniq, tots)
147
+ end
data/slyce.gemspec CHANGED
@@ -12,6 +12,7 @@ Gem::Specification.new do |s|
12
12
  s.files = `git ls-files`.split("\n") - %w[.gitignore]
13
13
  s.executables = `cd bin && git ls-files .`.split("\n")
14
14
  s.add_runtime_dependency "any_ascii", "~> 0.3.2"
15
+ s.add_runtime_dependency "duckdb", "~> 0.7.1"
15
16
  s.add_runtime_dependency "extralite-bundle", "~> 1.25"
16
17
  s.add_runtime_dependency "mysql2", "~> 0.5"
17
18
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: slyce
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steve Shreeve
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-03-18 00:00:00.000000000 Z
11
+ date: 2023-03-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: any_ascii
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: 0.3.2
27
+ - !ruby/object:Gem::Dependency
28
+ name: duckdb
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.7.1
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 0.7.1
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: extralite-bundle
29
43
  requirement: !ruby/object:Gem::Requirement
@@ -57,6 +71,7 @@ email: steve.shreeve@gmail.com
57
71
  executables:
58
72
  - slyce
59
73
  - slyce3
74
+ - slyced
60
75
  extensions: []
61
76
  extra_rdoc_files: []
62
77
  files:
@@ -65,6 +80,7 @@ files:
65
80
  - README.md
66
81
  - bin/slyce
67
82
  - bin/slyce3
83
+ - bin/slyced
68
84
  - slyce.gemspec
69
85
  homepage: https://github.com/shreeve/slyce
70
86
  licenses: