slyce 1.0.2 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (7) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +65 -1
  3. data/bin/slyce +43 -7
  4. data/bin/slyce3 +0 -2
  5. data/bin/slyced +147 -0
  6. data/slyce.gemspec +2 -1
  7. metadata +18 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f6b0933ed29b724b0cca2ed1a47ae8f18afd875d01957d8497a1da5e64aea67f
4
- data.tar.gz: 25e01d18ac4d15c989c8cfdb3bce0af15b7d66c176e8d411bd6634e263283d27
3
+ metadata.gz: 88c8c083ec80e6f7e25a3ba665a4bc9ec578b0f7d3939298b4ce97a1a13c430d
4
+ data.tar.gz: 0a4af788b109e4b8f000d304c53d7d1ad2b090545eca213bf9f7901cc40b1188
5
5
  SHA512:
6
- metadata.gz: a9ea1296cd1f3bd8d946e9a389b57f604771dfe960c6c2181128f4460dae0ae854ae8927142649c0d5ad7b45d07da9eef462fb77f2e6131607515540d700027d
7
- data.tar.gz: 91bb4e8c118925556fc218486d1e8d944b90ed216da2d84cf38ba2e956d0fc9a2e87e5fccf7e1fa5f4a446159b91fec7c13f000ae9442f15aa66ad9b9411648c
6
+ metadata.gz: 285327c0dba3c36a4c3da6ff0fa033589094ea8a72b2ffe49fe7370d2c89d137835e2aa60f7e0c40a4449f823e55434166b93fcc14c8bc39d431814a9d38b092
7
+ data.tar.gz: 78737aefd1bca2982594a175c8ee2ae49a5040b12e80382b57a792df3040789400cc747f6d19ab68eaa88c29a2341d2b679b4e895bf622a5b6df1039b5d0b5f6
data/README.md CHANGED
@@ -1,2 +1,66 @@
1
1
  # slyce
2
- Ruby utility to show data statistics for MySQL databases
2
+
3
+ Ruby utility to show summary statistics or export data from MySQL, SQLite, or DuckDB.
4
+
5
+ ## Supported platforms
6
+
7
+ ### MySQL
8
+
9
+ ### SQLite
10
+
11
+ ### DuckDB
12
+
13
+ Ensure the `duckdb` gem is installed via:
14
+
15
+ `gem install duckdb`
16
+
17
+ ## Example
18
+
19
+ The following will open `my-database.duck` and will export the first 10 rows
20
+ with the `last_name` field equal to `Miller` and it will extract six fields
21
+ starting with `id,first_name,...`. The output will be a `csv` file to `STDOUT`.
22
+
23
+ ```slyced my-database.duck members -r 10 -w "last_name='Miller'" -xid,first_name,city,state,zip,medical_plan --csv```
24
+
25
+ The following will show the most frequent 5 values from the `temp_table` table
26
+ in the `nearsite` database for these fields: `ef_id,external_id,insurance_plan`.
27
+ In addition, only rows with the condition specified in the `-w` option are shown.
28
+
29
+ ```slyce nearsite temp_table -s 10 -w "ef_id>=251 and benefit_status='X'" -x ef_id,external_id,insurance_plan -r 5```
30
+
31
+ The resulting table looks like:
32
+
33
+ ```text
34
+ ef_id
35
+ =====
36
+ 781 255
37
+ 35 253
38
+ 31 510
39
+ 19 251
40
+ 5 258
41
+ -----
42
+ 871 shown (top 5)
43
+ 879 total (all 10)
44
+
45
+ external_id
46
+ ===========
47
+ 816 NULL
48
+ 1 1235
49
+ 1 1266
50
+ 1 1320
51
+ 1 1548
52
+ -----
53
+ 820 shown (top 5)
54
+ 879 total (all 64)
55
+
56
+ insurance_plan
57
+ ==============
58
+ 176 Aetna PPO
59
+ 144 Aetna HSA
60
+ 103 NULL
61
+ 82 Kaiser Northern CA
62
+ 73
63
+ -----
64
+ 578 shown (top 5)
65
+ 879 total (all 35)
66
+ ```
data/bin/slyce CHANGED
@@ -1,7 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- VERSION="1.0.2"
4
-
5
3
  STDOUT.sync = true
6
4
 
7
5
  require "mysql2"
@@ -13,27 +11,42 @@ dbas = nil
13
11
  tabl = nil
14
12
 
15
13
  OptionParser.new.instance_eval do
14
+ @version = "1.2.0"
16
15
  @banner = "usage: #{program_name} [options] <database> <table>"
17
16
 
17
+ on "--csv" , "Output comma separated values"
18
+ on "--psv" , "Output pipe separated values"
19
+ on "--tsv" , "Output tab separated values"
20
+ on "-a", "--ascii" , "Convert data to ASCII using AnyAscii"
18
21
  on "-c", "--columns" , "Display column names and quit"
19
22
  on "-h", "--help" , "Show help and command usage" do Kernel.abort to_s; end
20
23
  on "-n", "--natural" , "Sort naturally, not numerically"
21
- on "-s", "--show <count>" , "Show this many values", Integer
22
- on "-v", "--version" , "Show version number" do Kernel.abort "#{program_name} #{VERSION}"; end
24
+ on "-r", "--rows <count>" , "Rows of data to select", Integer
25
+ on "-s", "--suppress" , "Suppress header when exporting delimited files"
26
+ on "-v", "--version" , "Show version number" do Kernel.abort "#{program_name} #{@version}"; end
23
27
  on "-w", "--where <cond>" , "Where clause (eg - 'age>50 and state='AZ')"
24
28
  on "-x", "--extract <col1,col2,...>", "Comma separated list of columns to extract"
25
29
 
26
30
  self
27
31
  end.parse!(into: opts={}) rescue abort($!.message)
28
32
 
29
- filt = opts[:where] and filt = "where\n #{filt}"
30
- natu = opts[:natural]
31
- show = opts[:show]
33
+ xcsv = opts[:csv]
34
+ xpsv = opts[:psv]
35
+ xtsv = opts[:tsv]
36
+ xprt = xcsv || xpsv || xtsv and require "censive"
37
+
38
+ asky = opts[:ascii ] and require "any_ascii"
39
+ filt = opts[:where ] and filt = "where\n #{filt}"
40
+ hide = opts[:suppress]
41
+ natu = opts[:natural ]
42
+ show = opts[:rows ]
32
43
  want = opts[:extract].to_s.downcase.split(",")
33
44
 
34
45
  dbas ||= ARGV.shift or abort "no database given"
35
46
  tabl ||= ARGV.shift or abort "no table given"
36
47
 
48
+ [xcsv, xpsv, xtsv].compact.size > 1 and abort "only one of csv, psv, or tsv allowed"
49
+
37
50
  # ==[ Helpers ]==
38
51
 
39
52
  class Mysql2::Client
@@ -76,6 +89,29 @@ if want.empty?
76
89
  abort "no columns are selected"
77
90
  end
78
91
 
92
+ # handle exports
93
+ if xprt
94
+ list = want.map {|item| "`#{item}`" }.join(", ")
95
+ stmt = show ? "limit #{show}" : ""
96
+ data = conn.sql(<<~"" + stmt).to_a
97
+ select
98
+ #{list}
99
+ from
100
+ `#{tabl}`
101
+ #{filt}
102
+
103
+ seps = xcsv ? "," : xtsv ? "\t" : xpsv ? "|" : abort("unknown separator #{seps.inspect}")
104
+
105
+ Censive.writer(sep: seps) do |csv|
106
+ csv << want unless hide
107
+ data.each do |row|
108
+ csv << row.map {|e| asky ? AnyAscii.transliterate(e.to_s) : e.to_s }
109
+ end
110
+ end
111
+
112
+ exit
113
+ end
114
+
79
115
  want.each do |name|
80
116
  sort = natu ? "" : "cnt desc,"
81
117
  stmt = show ? "limit #{show}" : ""
data/bin/slyce3 CHANGED
@@ -9,8 +9,6 @@
9
9
  # wget https://github.com/nalgeon/sqlean/releases/download/0.19.3/sqlean-macos-arm64.zip
10
10
  # unzip sqlean-macos-arm64.zip regexp.dylib
11
11
 
12
- VERSION="1.0.2"
13
-
14
12
  STDOUT.sync = true
15
13
 
16
14
  require "extralite"
data/bin/slyced ADDED
@@ -0,0 +1,147 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ STDOUT.sync = true
4
+
5
+ require "duckdb"
6
+ require "optparse"
7
+
8
+ trap("INT" ) { abort "\n" }
9
+
10
+ dbas = nil
11
+ tabl = nil
12
+
13
+ OptionParser.new.instance_eval do
14
+ @version = "1.2.0"
15
+ @banner = "usage: #{program_name} [options] <database> <table>"
16
+
17
+ on "--csv" , "Output comma separated values"
18
+ on "--psv" , "Output pipe separated values"
19
+ on "--tsv" , "Output tab separated values"
20
+ on "-a", "--ascii" , "Convert data to ASCII using AnyAscii"
21
+ on "-c", "--columns" , "Display column names and quit"
22
+ on "-h", "--help" , "Show help and command usage" do Kernel.abort to_s; end
23
+ on "-n", "--natural" , "Sort naturally, not numerically"
24
+ on "-r", "--rows <count>" , "Rows of data to select", Integer
25
+ on "-s", "--suppress" , "Suppress header when exporting delimited files"
26
+ on "-v", "--version" , "Show version number" do Kernel.abort "#{program_name} #{@version}"; end
27
+ on "-w", "--where <cond>" , "Where clause (eg - 'age>50 and state='AZ')"
28
+ on "-x", "--extract <col1,col2,...>", "Comma separated list of columns to extract"
29
+
30
+ self
31
+ end.parse!(into: opts={}) rescue abort($!.message)
32
+
33
+ xcsv = opts[:csv]
34
+ xpsv = opts[:psv]
35
+ xtsv = opts[:tsv]
36
+ xprt = xcsv || xpsv || xtsv and require "censive"
37
+
38
+ asky = opts[:ascii ] and require "any_ascii"
39
+ filt = opts[:where ] and filt = "where\n #{filt}"
40
+ hide = opts[:suppress]
41
+ natu = opts[:natural ]
42
+ show = opts[:rows ]
43
+ want = opts[:extract].to_s.downcase.split(",")
44
+
45
+ dbas ||= ARGV.shift or abort "no database given"
46
+ tabl ||= ARGV.shift or abort "no table given"
47
+
48
+ [xcsv, xpsv, xtsv].compact.size > 1 and abort "only one of csv, psv, or tsv allowed"
49
+
50
+ # ==[ Helpers ]==
51
+
52
+ class DuckDB::Connection
53
+ alias_method :sql, :query
54
+
55
+ def sql!(stmt, *args, **, &)
56
+ puts "\n==[ SQL statement ]==\n\n", stmt.strip, ";"
57
+ sql(stmt, *args, **, &)
58
+ end
59
+ end
60
+
61
+ def display(name, data, show, uniq, tots)
62
+ seen = data.inject(0) {|seen, coun| seen += coun[0] }
63
+ rows = [data.size, seen].min
64
+ wide = tots.to_s.size
65
+ fill = " " * wide
66
+ line = "=" * name.size
67
+
68
+ puts "\n#{fill} #{name}\n#{fill} #{line}\n"
69
+ data.each {|cnt, val| puts "%*d %s" % [wide, cnt, val || "NULL"] }
70
+ puts "#{fill} -----\n"
71
+ puts "%*d shown (top %d)" % [wide, seen, rows] if show && rows > 1
72
+ puts "%*d total (all %d)" % [wide, tots, uniq] if uniq > 1
73
+ puts "%*d total" % [wide, tots ] unless uniq > 1
74
+ end
75
+
76
+ # ==[ Let 'er rip! ]==
77
+
78
+ conn = DuckDB::Database.open(dbas).connect
79
+ resu = conn.query(<<~end)
80
+ select column_name
81
+ from information_schema.columns
82
+ where table_name='#{tabl}'
83
+ order by ordinal_position
84
+ end
85
+ cols = resu.to_a.flatten
86
+ want = want.empty? ? cols : want & cols
87
+
88
+ if opts[:columns]
89
+ puts cols
90
+ exit
91
+ end
92
+
93
+ if want.empty?
94
+ abort "no columns are selected"
95
+ end
96
+
97
+ # handle exports
98
+ if xprt
99
+ list = want.map {|item| "\"#{item}\"" }.join(", ")
100
+ stmt = show ? "limit #{show}" : ""
101
+ data = conn.sql(<<~"" + stmt).to_a
102
+ select
103
+ #{list}
104
+ from
105
+ "#{tabl}"
106
+ #{filt}
107
+
108
+ seps = xcsv ? "," : xtsv ? "\t" : xpsv ? "|" : abort("unknown separator #{seps.inspect}")
109
+
110
+ Censive.writer(sep: seps) do |csv|
111
+ csv << want unless hide
112
+ data.each do |row|
113
+ csv << row.map {|e| asky ? AnyAscii.transliterate(e.to_s) : e.to_s }
114
+ end
115
+ end
116
+
117
+ exit
118
+ end
119
+
120
+ want.each do |name|
121
+ sort = natu ? "" : "cnt desc,"
122
+ stmt = show ? "limit #{show}" : ""
123
+ data = conn.sql(<<~"" + stmt).to_a
124
+ select
125
+ count(*) as cnt,
126
+ "#{name}" as val
127
+ from
128
+ "#{tabl}"
129
+ #{filt}
130
+ group by
131
+ val
132
+ order by #{sort}
133
+ if(regexp_matches("#{name}", '^[-+]?((0|([1-9]\\d*)(\\.\\d*)?)|((0|([1-9]\\d*))\\.\\d+))$'),cast("#{name}" as double),null) nulls last,
134
+ if(regexp_matches("#{name}", '^0\\d*$'),length("#{name}"),null) nulls last,
135
+ if(regexp_matches("#{name}", '^\\d+\\D'),length(regexp_extract("#{name}",'^(\\d+)',1)),null) nulls last,
136
+ "#{name}" is null, "#{name}"
137
+
138
+ uniq, tots = conn.sql(<<~"").to_a[0]
139
+ select
140
+ count(distinct(ifnull("#{name}",0))),
141
+ count(ifnull("#{name}",0))
142
+ from
143
+ "#{tabl}"
144
+ #{filt}
145
+
146
+ display(name, data, show, uniq, tots)
147
+ end
data/slyce.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "slyce"
5
- s.version = `grep '^VERSION' bin/slyce | cut -f 2 -d '"'`
5
+ s.version = `grep -m 1 '^\s*@version' bin/slyce | cut -f 2 -d '"'`
6
6
  s.author = "Steve Shreeve"
7
7
  s.email = "steve.shreeve@gmail.com"
8
8
  s.summary =
@@ -11,6 +11,7 @@ Gem::Specification.new do |s|
11
11
  s.license = "MIT"
12
12
  s.files = `git ls-files`.split("\n") - %w[.gitignore]
13
13
  s.executables = `cd bin && git ls-files .`.split("\n")
14
+ s.add_runtime_dependency "any_ascii", "~> 0.3.2"
14
15
  s.add_runtime_dependency "extralite-bundle", "~> 1.25"
15
16
  s.add_runtime_dependency "mysql2", "~> 0.5"
16
17
  end
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: slyce
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steve Shreeve
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-03-13 00:00:00.000000000 Z
11
+ date: 2023-03-20 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: any_ascii
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 0.3.2
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 0.3.2
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: extralite-bundle
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -43,6 +57,7 @@ email: steve.shreeve@gmail.com
43
57
  executables:
44
58
  - slyce
45
59
  - slyce3
60
+ - slyced
46
61
  extensions: []
47
62
  extra_rdoc_files: []
48
63
  files:
@@ -51,6 +66,7 @@ files:
51
66
  - README.md
52
67
  - bin/slyce
53
68
  - bin/slyce3
69
+ - bin/slyced
54
70
  - slyce.gemspec
55
71
  homepage: https://github.com/shreeve/slyce
56
72
  licenses: