slyce 1.1.0 → 1.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (7) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +65 -1
  3. data/bin/slyce +1 -1
  4. data/bin/slyce3 +0 -2
  5. data/bin/slyced +147 -0
  6. data/slyce.gemspec +1 -0
  7. metadata +18 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6a2d1af8d0c1835beb4e57d013704dc791ac769a71ed1c507b1f1a39be4b2ace
4
- data.tar.gz: 0eb2255e8effcb0a12a72cb3dc020ebe35943f73b181f511ec865e168604b699
3
+ metadata.gz: f40b8dfb3127b82f3c9ea3877afb4be3e2c1e93a70fc6308bd4710978eb8299a
4
+ data.tar.gz: d328c938fcb4b833aed2a8c2772ce2eebc340aa6c620211150abaf29e6fae7a1
5
5
  SHA512:
6
- metadata.gz: 3c8c2e3ae1862d9a549a38f01fc2f438d736a85ff08f6a072e18894b93a8081317fa50293f382bbadd4b64ba21f8a37cbd3b0c383f8c2e0dfdb2cf16c966c49a
7
- data.tar.gz: d3895d08a1c90e110906f29d74fd0623e715b933c1b5de7a369a8f47d149bc955e22e79ee17ebee0209dc429db6683535baed77dc59f874a91438309b1e17922
6
+ metadata.gz: 8bb4e9abe474a99ff73833a079faa0b1d485274ef5b6ff146b48114b207d997cbe4b9590e0bad87da628f263ec8c01c73e0138be760708fb4a5023d63a16a1af
7
+ data.tar.gz: a920d2313061b4a6275ac6a0392ba6197f003482f011a1de80040f35f9db72fd240d612b2c94bd30f918f96c5736d02dab9691a7955c580a05c72f3e4fec4708
data/README.md CHANGED
@@ -1,2 +1,66 @@
1
1
  # slyce
2
- Ruby utility to show data statistics for MySQL databases
2
+
3
+ Ruby utility to show summary statistics or export data from MySQL, SQLite, or DuckDB.
4
+
5
+ ## Supported platforms
6
+
7
+ ### MySQL
8
+
9
+ ### SQLite
10
+
11
+ ### DuckDB
12
+
13
+ Ensure the `duckdb` gem is installed via:
14
+
15
+ `gem install duckdb`
16
+
17
+ ## Example
18
+
19
+ The following will open `my-database.duck` and will export the first 10 rows
20
+ with the `last_name` field equal to `Miller` and it will extract six fields
21
+ starting with `id,first_name,...`. The output will be a `csv` file to `STDOUT`.
22
+
23
+ ```slyced my-database.duck members -r 10 -w "last_name='Miller'" -xid,first_name,city,state,zip,medical_plan --csv```
24
+
25
+ The following will show the most frequent 5 values from the `temp_table` table
26
+ in the `nearsite` database for these fields: `ef_id,external_id,insurance_plan`.
27
+ In addition, only rows with the condition specified in the `-w` option are shown.
28
+
29
+ ```slyce nearsite temp_table -s 10 -w "ef_id>=251 and benefit_status='X'" -x ef_id,external_id,insurance_plan -r 5```
30
+
31
+ The resulting table looks like:
32
+
33
+ ```text
34
+ ef_id
35
+ =====
36
+ 781 255
37
+ 35 253
38
+ 31 510
39
+ 19 251
40
+ 5 258
41
+ -----
42
+ 871 shown (top 5)
43
+ 879 total (all 10)
44
+
45
+ external_id
46
+ ===========
47
+ 816 NULL
48
+ 1 1235
49
+ 1 1266
50
+ 1 1320
51
+ 1 1548
52
+ -----
53
+ 820 shown (top 5)
54
+ 879 total (all 64)
55
+
56
+ insurance_plan
57
+ ==============
58
+ 176 Aetna PPO
59
+ 144 Aetna HSA
60
+ 103 NULL
61
+ 82 Kaiser Northern CA
62
+ 73
63
+ -----
64
+ 578 shown (top 5)
65
+ 879 total (all 35)
66
+ ```
data/bin/slyce CHANGED
@@ -11,7 +11,7 @@ dbas = nil
11
11
  tabl = nil
12
12
 
13
13
  OptionParser.new.instance_eval do
14
- @version = "1.1.0"
14
+ @version = "1.2.1"
15
15
  @banner = "usage: #{program_name} [options] <database> <table>"
16
16
 
17
17
  on "--csv" , "Output comma separated values"
data/bin/slyce3 CHANGED
@@ -9,8 +9,6 @@
9
9
  # wget https://github.com/nalgeon/sqlean/releases/download/0.19.3/sqlean-macos-arm64.zip
10
10
  # unzip sqlean-macos-arm64.zip regexp.dylib
11
11
 
12
- VERSION="1.0.2"
13
-
14
12
  STDOUT.sync = true
15
13
 
16
14
  require "extralite"
data/bin/slyced ADDED
@@ -0,0 +1,147 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ STDOUT.sync = true
4
+
5
+ require "duckdb"
6
+ require "optparse"
7
+
8
+ trap("INT" ) { abort "\n" }
9
+
10
+ dbas = nil
11
+ tabl = nil
12
+
13
+ OptionParser.new.instance_eval do
14
+ @version = "1.2.1"
15
+ @banner = "usage: #{program_name} [options] <database> <table>"
16
+
17
+ on "--csv" , "Output comma separated values"
18
+ on "--psv" , "Output pipe separated values"
19
+ on "--tsv" , "Output tab separated values"
20
+ on "-a", "--ascii" , "Convert data to ASCII using AnyAscii"
21
+ on "-c", "--columns" , "Display column names and quit"
22
+ on "-h", "--help" , "Show help and command usage" do Kernel.abort to_s; end
23
+ on "-n", "--natural" , "Sort naturally, not numerically"
24
+ on "-r", "--rows <count>" , "Rows of data to select", Integer
25
+ on "-s", "--suppress" , "Suppress header when exporting delimited files"
26
+ on "-v", "--version" , "Show version number" do Kernel.abort "#{program_name} #{@version}"; end
27
+ on "-w", "--where <cond>" , "Where clause (eg - 'age>50 and state='AZ')"
28
+ on "-x", "--extract <col1,col2,...>", "Comma separated list of columns to extract"
29
+
30
+ self
31
+ end.parse!(into: opts={}) rescue abort($!.message)
32
+
33
+ xcsv = opts[:csv]
34
+ xpsv = opts[:psv]
35
+ xtsv = opts[:tsv]
36
+ xprt = xcsv || xpsv || xtsv and require "censive"
37
+
38
+ asky = opts[:ascii ] and require "any_ascii"
39
+ filt = opts[:where ] and filt = "where\n #{filt}"
40
+ hide = opts[:suppress]
41
+ natu = opts[:natural ]
42
+ show = opts[:rows ]
43
+ want = opts[:extract].to_s.downcase.split(",")
44
+
45
+ dbas ||= ARGV.shift or abort "no database given"
46
+ tabl ||= ARGV.shift or abort "no table given"
47
+
48
+ [xcsv, xpsv, xtsv].compact.size > 1 and abort "only one of csv, psv, or tsv allowed"
49
+
50
+ # ==[ Helpers ]==
51
+
52
+ class DuckDB::Connection
53
+ alias_method :sql, :query
54
+
55
+ def sql!(stmt, *args, **, &)
56
+ puts "\n==[ SQL statement ]==\n\n", stmt.strip, ";"
57
+ sql(stmt, *args, **, &)
58
+ end
59
+ end
60
+
61
+ def display(name, data, show, uniq, tots)
62
+ seen = data.inject(0) {|seen, coun| seen += coun[0] }
63
+ rows = [data.size, seen].min
64
+ wide = tots.to_s.size
65
+ fill = " " * wide
66
+ line = "=" * name.size
67
+
68
+ puts "\n#{fill} #{name}\n#{fill} #{line}\n"
69
+ data.each {|cnt, val| puts "%*d %s" % [wide, cnt, val || "NULL"] }
70
+ puts "#{fill} -----\n"
71
+ puts "%*d shown (top %d)" % [wide, seen, rows] if show && rows > 1
72
+ puts "%*d total (all %d)" % [wide, tots, uniq] if uniq > 1
73
+ puts "%*d total" % [wide, tots ] unless uniq > 1
74
+ end
75
+
76
+ # ==[ Let 'er rip! ]==
77
+
78
+ conn = DuckDB::Database.open(dbas).connect
79
+ resu = conn.query(<<~end)
80
+ select column_name
81
+ from information_schema.columns
82
+ where table_name='#{tabl}'
83
+ order by ordinal_position
84
+ end
85
+ cols = resu.to_a.flatten
86
+ want = want.empty? ? cols : want & cols
87
+
88
+ if opts[:columns]
89
+ puts cols
90
+ exit
91
+ end
92
+
93
+ if want.empty?
94
+ abort "no columns are selected"
95
+ end
96
+
97
+ # handle exports
98
+ if xprt
99
+ list = want.map {|item| "\"#{item}\"" }.join(", ")
100
+ stmt = show ? "limit #{show}" : ""
101
+ data = conn.sql(<<~"" + stmt).to_a
102
+ select
103
+ #{list}
104
+ from
105
+ "#{tabl}"
106
+ #{filt}
107
+
108
+ seps = xcsv ? "," : xtsv ? "\t" : xpsv ? "|" : abort("unknown separator #{seps.inspect}")
109
+
110
+ Censive.writer(sep: seps) do |csv|
111
+ csv << want unless hide
112
+ data.each do |row|
113
+ csv << row.map {|e| asky ? AnyAscii.transliterate(e.to_s) : e.to_s }
114
+ end
115
+ end
116
+
117
+ exit
118
+ end
119
+
120
+ want.each do |name|
121
+ sort = natu ? "" : "cnt desc,"
122
+ stmt = show ? "limit #{show}" : ""
123
+ data = conn.sql(<<~"" + stmt).to_a
124
+ select
125
+ count(*) as cnt,
126
+ "#{name}" as val
127
+ from
128
+ "#{tabl}"
129
+ #{filt}
130
+ group by
131
+ val
132
+ order by #{sort}
133
+ if(regexp_matches("#{name}", '^[-+]?((0|([1-9]\\d*)(\\.\\d*)?)|((0|([1-9]\\d*))\\.\\d+))$'),cast("#{name}" as double),null) nulls last,
134
+ if(regexp_matches("#{name}", '^0\\d*$'),length("#{name}"),null) nulls last,
135
+ if(regexp_matches("#{name}", '^\\d+\\D'),length(regexp_extract("#{name}",'^(\\d+)',1)),null) nulls last,
136
+ "#{name}" is null, "#{name}"
137
+
138
+ uniq, tots = conn.sql(<<~"").to_a[0]
139
+ select
140
+ count(distinct(ifnull("#{name}",0))),
141
+ count(ifnull("#{name}",0))
142
+ from
143
+ "#{tabl}"
144
+ #{filt}
145
+
146
+ display(name, data, show, uniq, tots)
147
+ end
data/slyce.gemspec CHANGED
@@ -12,6 +12,7 @@ Gem::Specification.new do |s|
12
12
  s.files = `git ls-files`.split("\n") - %w[.gitignore]
13
13
  s.executables = `cd bin && git ls-files .`.split("\n")
14
14
  s.add_runtime_dependency "any_ascii", "~> 0.3.2"
15
+ s.add_runtime_dependency "duckdb", "~> 0.7.1"
15
16
  s.add_runtime_dependency "extralite-bundle", "~> 1.25"
16
17
  s.add_runtime_dependency "mysql2", "~> 0.5"
17
18
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: slyce
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steve Shreeve
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-03-18 00:00:00.000000000 Z
11
+ date: 2023-03-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: any_ascii
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: 0.3.2
27
+ - !ruby/object:Gem::Dependency
28
+ name: duckdb
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.7.1
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 0.7.1
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: extralite-bundle
29
43
  requirement: !ruby/object:Gem::Requirement
@@ -57,6 +71,7 @@ email: steve.shreeve@gmail.com
57
71
  executables:
58
72
  - slyce
59
73
  - slyce3
74
+ - slyced
60
75
  extensions: []
61
76
  extra_rdoc_files: []
62
77
  files:
@@ -65,6 +80,7 @@ files:
65
80
  - README.md
66
81
  - bin/slyce
67
82
  - bin/slyce3
83
+ - bin/slyced
68
84
  - slyce.gemspec
69
85
  homepage: https://github.com/shreeve/slyce
70
86
  licenses: