repertoire-faceting 0.5.5 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/FAQ +23 -17
  3. data/INSTALL +52 -84
  4. data/LICENSE +1 -1
  5. data/README +213 -34
  6. data/TODO +20 -7
  7. data/ext/Makefile +24 -14
  8. data/ext/README.faceting +51 -0
  9. data/ext/bytea/bytea.sql +173 -0
  10. data/ext/bytea/faceting_bytea.control +6 -0
  11. data/ext/common/util.sql +35 -0
  12. data/ext/faceting--0.6.0.sql +251 -0
  13. data/ext/faceting_bytea--0.6.0.sql +207 -0
  14. data/ext/faceting_varbit--0.6.0.sql +198 -0
  15. data/ext/signature/faceting.control +6 -0
  16. data/ext/signature/signature.c +740 -0
  17. data/ext/{signature.o → signature/signature.o} +0 -0
  18. data/ext/{signature.so → signature/signature.so} +0 -0
  19. data/ext/signature/signature.sql +217 -0
  20. data/ext/varbit/faceting_varbit.control +7 -0
  21. data/ext/varbit/varbit.sql +164 -0
  22. data/{public → lib/assets}/images/repertoire-faceting/proportional_symbol.png +0 -0
  23. data/{public → lib/assets}/images/repertoire-faceting/spinner_sm.gif +0 -0
  24. data/{public → lib/assets}/javascripts/rep.faceting/context.js +2 -2
  25. data/{public → lib/assets}/javascripts/rep.faceting/ext/earth_facet.js +2 -4
  26. data/{public → lib/assets}/javascripts/rep.faceting/facet.js +1 -1
  27. data/{public → lib/assets}/javascripts/rep.faceting/facet_widget.js +3 -8
  28. data/{public → lib/assets}/javascripts/rep.faceting/nested_facet.js +1 -1
  29. data/{public → lib/assets}/javascripts/rep.faceting/results.js +1 -1
  30. data/{public → lib/assets}/javascripts/rep.faceting.js +5 -1
  31. data/{public → lib/assets}/javascripts/rep.protovis-facets.js +3 -3
  32. data/lib/assets/javascripts/rep.widgets/events.js +51 -0
  33. data/lib/assets/javascripts/rep.widgets/global.js +50 -0
  34. data/lib/assets/javascripts/rep.widgets/model.js +159 -0
  35. data/lib/assets/javascripts/rep.widgets/widget.js +213 -0
  36. data/lib/assets/javascripts/rep.widgets.js +14 -0
  37. data/{public → lib/assets}/stylesheets/rep.faceting.css +1 -1
  38. data/lib/repertoire-faceting/adapters/postgresql_adapter.rb +107 -48
  39. data/lib/repertoire-faceting/facets/abstract_facet.rb +43 -27
  40. data/lib/repertoire-faceting/facets/basic_facet.rb +23 -22
  41. data/lib/repertoire-faceting/facets/nested_facet.rb +50 -27
  42. data/lib/repertoire-faceting/model.rb +101 -65
  43. data/lib/repertoire-faceting/rails/engine.rb +8 -0
  44. data/lib/repertoire-faceting/rails/postgresql_adapter.rb +0 -1
  45. data/lib/repertoire-faceting/rails/relation.rb +0 -1
  46. data/lib/repertoire-faceting/railtie.rb +0 -1
  47. data/lib/repertoire-faceting/relation/calculations.rb +7 -2
  48. data/lib/repertoire-faceting/relation/query_methods.rb +17 -4
  49. data/lib/repertoire-faceting/routing.rb +2 -5
  50. data/lib/repertoire-faceting/tasks/all.rake +5 -4
  51. data/lib/repertoire-faceting/tasks/client.rake +2 -5
  52. data/lib/repertoire-faceting/version.rb +1 -1
  53. data/lib/repertoire-faceting.rb +2 -4
  54. data/{public → vendor/assets}/javascripts/google-earth-extensions.js +0 -0
  55. data/{public → vendor/assets}/javascripts/protovis.js +0 -0
  56. metadata +78 -78
  57. data/ext/README.signature +0 -33
  58. data/ext/signature.c +0 -740
  59. data/ext/signature.sql +0 -342
  60. data/ext/signature.sql.IN +0 -342
  61. data/ext/uninstall_signature.sql +0 -4
  62. data/ext/uninstall_signature.sql.IN +0 -4
  63. data/lib/repertoire-faceting/adapters/abstract_adapter.rb +0 -18
  64. data/lib/repertoire-faceting/relation/spawn_methods.rb +0 -26
@@ -0,0 +1,173 @@
1
+ -- ============================================================================
2
+ -- Faceting API implementing bitmap indices using PostgreSQL's built-in BYTEA
3
+ -- type, processed using plv8 typed arrays.
4
+ --
5
+ -- This API is suitable for deployment on Heroku, where plv8 is installed by
6
+ -- default. Performance is many times better than the VARBIT-based faceting
7
+ -- API, primarily because of optimisations in memory handling in the count
8
+ -- function.
9
+ --
10
+ -- See https://code.google.com/p/plv8js/wiki/PLV8
11
+ -- https://postgres.heroku.com/blog/past/2013/6/5/javascript_in_your_postgres/
12
+ --
13
+ -- Christopher York
14
+ -- MIT Hyperstudio
15
+ -- February 2014
16
+ -- ============================================================================
17
+
18
+ CREATE EXTENSION IF NOT EXISTS plv8;
19
+
20
+ SET bytea_output TO hex;
21
+
22
+ -- these functions are in pl/pgsql, because they involve appending bytea values,
23
+ -- which is easier done with direct access to the || operator
24
+
25
+ CREATE FUNCTION @extschema@.sig_resize( sig BYTEA, bits INT ) RETURNS BYTEA AS $$
26
+ DECLARE
27
+ len INT;
28
+ bytes INT;
29
+ BEGIN
30
+ bytes := ceil(bits / 8.0)::INT;
31
+ len := length(sig);
32
+ IF bytes > len THEN
33
+ -- RAISE NOTICE 'Extending signature from % to % bytes', len, bytes;
34
+ RETURN sig || ('\x' || repeat('00', bytes - len))::BYTEA;
35
+ ELSIF bits < len THEN
36
+ -- no provision in PostgreSQL for truncating a BYTEA
37
+ RETURN sig;
38
+ END IF;
39
+ RETURN sig;
40
+ END $$ LANGUAGE plpgsql STRICT IMMUTABLE;
41
+
42
+ CREATE FUNCTION @extschema@.sig_set( sig BYTEA, pos INT, val INT) RETURNS BYTEA AS $$
43
+ BEGIN
44
+ RETURN set_bit(sig_resize(sig, pos+1), pos, val);
45
+ END $$ LANGUAGE plpgsql STRICT IMMUTABLE;
46
+
47
+ CREATE FUNCTION @extschema@.sig_set( sig BYTEA, pos INT) RETURNS BYTEA AS $$
48
+ BEGIN
49
+ RETURN @extschema@.sig_set(sig, pos, 1);
50
+ END $$ LANGUAGE plpgsql STRICT IMMUTABLE;
51
+
52
+ -- these functions are in javascript, because (1) pl/pgsql is close
53
+ -- to the worst language in the world; (2) plv8's typed arrays make
54
+ -- the count function much faster
55
+
56
+ CREATE FUNCTION @extschema@.sig_get( sig BYTEA, pos INT ) RETURNS INT AS $$
57
+ if (pos <= sig.length * 8) {
58
+ return sig[ Math.floor(pos / 8) ] >> (pos % 8) & 1;
59
+ } else {
60
+ return 0;
61
+ }
62
+ $$ LANGUAGE plv8 STRICT IMMUTABLE;
63
+
64
+ CREATE FUNCTION @extschema@.sig_length( sig BYTEA ) RETURNS INT AS $$
65
+ return sig.length;
66
+ $$ LANGUAGE plv8 STRICT IMMUTABLE;
67
+
68
+ CREATE FUNCTION @extschema@.sig_and(sig1 BYTEA, sig2 BYTEA) RETURNS BYTEA AS $$
69
+ if (sig2.length < sig1.length) {
70
+ var tmp = sig1;
71
+ sig1 = sig2;
72
+ sig2 = tmp;
73
+ }
74
+ for (var i = 0; i < sig1.length; i++) {
75
+ sig1[i] = sig1[i] & sig2[i];
76
+ }
77
+ return sig1;
78
+ $$ LANGUAGE plv8 STRICT IMMUTABLE;
79
+
80
+ CREATE FUNCTION @extschema@.sig_or(sig1 BYTEA, sig2 BYTEA) RETURNS BYTEA AS $$
81
+ if (sig2.length > sig1.length) {
82
+ var tmp = sig1;
83
+ sig1 = sig2;
84
+ sig2 = tmp;
85
+ }
86
+ for (var i = 0; i < sig2.length; i++) {
87
+ sig1[i] = sig1[i] | sig2[i];
88
+ }
89
+ return sig1;
90
+ $$ LANGUAGE plv8 STRICT IMMUTABLE;
91
+
92
+ CREATE FUNCTION @extschema@.count(sig bytea) RETURNS int4 AS $$
93
+ var count_table = [
94
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
95
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
96
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
97
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
98
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
99
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
100
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
101
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
102
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
103
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
104
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
105
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
106
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
107
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
108
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
109
+ 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
110
+ ];
111
+ var count = 0;
112
+ for (var i = 0; i < sig.length; i++) { count += count_table[ sig[i] ]; }
113
+ return count;
114
+ $$ LANGUAGE plv8 STRICT IMMUTABLE;
115
+
116
+ CREATE FUNCTION @extschema@.contains( sig BYTEA, pos INT ) RETURNS BOOL AS $$
117
+ return (pos <= sig.length * 8) && (sig[ Math.floor(pos / 8) ] >> (pos % 8) & 1);
118
+ $$ LANGUAGE plv8 STRICT IMMUTABLE;
119
+
120
+ CREATE FUNCTION @extschema@.members( sig BYTEA ) RETURNS SETOF INT AS $$
121
+ for (var i = 0; i < sig.length; i++) {
122
+ for (var j = 0; j < 8; j++) {
123
+ if (sig[i] >> j & 1) {
124
+ plv8.return_next(i * 8 + j);
125
+ }
126
+ }
127
+ }
128
+ $$ LANGUAGE plv8 STRICT IMMUTABLE;
129
+
130
+
131
+ -- operators for faceting
132
+
133
+ CREATE OPERATOR @extschema@.& (
134
+ leftarg = BYTEA,
135
+ rightarg = BYTEA,
136
+ procedure = @extschema@.sig_and,
137
+ commutator = &
138
+ );
139
+
140
+ CREATE OPERATOR @extschema@.| (
141
+ leftarg = BYTEA,
142
+ rightarg = BYTEA,
143
+ procedure = @extschema@.sig_or,
144
+ commutator = |
145
+ );
146
+
147
+ CREATE OPERATOR @extschema@.+ (
148
+ leftarg = BYTEA,
149
+ rightarg = int,
150
+ procedure = @extschema@.sig_set
151
+ );
152
+
153
+
154
+ -- aggregate functions for faceting
155
+
156
+ CREATE AGGREGATE @extschema@.collect( BYTEA )
157
+ (
158
+ sfunc = @extschema@.sig_or,
159
+ stype = BYTEA
160
+ );
161
+
162
+ CREATE AGGREGATE @extschema@.filter( BYTEA )
163
+ (
164
+ sfunc = @extschema@.sig_and,
165
+ stype = BYTEA
166
+ );
167
+
168
+ CREATE AGGREGATE @extschema@.signature( INT )
169
+ (
170
+ sfunc = @extschema@.sig_set,
171
+ stype = BYTEA,
172
+ initcond = ''
173
+ );
@@ -0,0 +1,6 @@
1
+ # Faceting PostgreSQL extension module
2
+
3
+ comment = 'API for faceted indexing and queries (based on plv8 + bytea bitmaps)'
4
+ requires = 'plv8, plpgsql'
5
+ default_version = '0.6.0'
6
+ schema = 'facet'
@@ -0,0 +1,35 @@
1
+ -- ============================================================================
2
+ -- These functions are common to all bindings of the Repertoire faceting API
3
+ --
4
+ -- Christopher York
5
+ -- MIT Hyperstudio
6
+ -- February 2014
7
+ -- ============================================================================
8
+
9
+
10
+ -- Aggregator to measure how many bits from a loosely-packed id column would be wasted, if
11
+ -- they were all collected into a bitset signature. Returns a float between 0 (no waste)
12
+ -- and 1.0 (all waste). An example of its use:
13
+ --
14
+ -- SELECT wastage(id) FROM nobelists
15
+ -- =# 0.999999
16
+ --
17
+ -- ALTER TABLE nobelists ADD COLUMN _packed_id SERIAL
18
+ -- SELECT wastage(_packed_id) FROM nobelists
19
+ -- =# 0.015625
20
+ --
21
+ CREATE FUNCTION @extschema@.wastage_proportion(state INT[]) RETURNS double precision AS $$
22
+ SELECT (1.0 - (state[1]::double precision / (COALESCE(state[2], 0.0) + 1.0)))
23
+ $$ LANGUAGE sql;
24
+
25
+ CREATE FUNCTION @extschema@.wastage_accum(state INT[], val INT) RETURNS INT[] AS $$
26
+ SELECT ARRAY[ state[1]+1, GREATEST(state[2], val) ];
27
+ $$ LANGUAGE sql;
28
+
29
+ CREATE AGGREGATE @extschema@.wastage( INT )
30
+ (
31
+ sfunc = @extschema@.wastage_accum,
32
+ stype = INT[],
33
+ finalfunc = @extschema@.wastage_proportion,
34
+ initcond = '{0,0}'
35
+ );
@@ -0,0 +1,251 @@
1
+ -- ============================================================================
2
+ -- Faceting API implementing bitmap indices using a custom C datatype and
3
+ -- associated functions.
4
+ --
5
+ -- This API is to be preferred in all situations where it is possible to
6
+ -- build and install the datatype (requires superuser access to PostgreSQL)
7
+ --
8
+ -- Christopher York
9
+ -- MIT Hyperstudio
10
+ -- February 2014
11
+ -- ============================================================================
12
+
13
+ -- complain if script is sourced in psql, rather than via CREATE EXTENSION
14
+ \echo Use "CREATE EXTENSION faceting" to load this the default faceting API.\quit
15
+
16
+ -- functions for bitmap indices using datatype written in C
17
+
18
+ CREATE TYPE @extschema@.signature;
19
+
20
+ -- basic i/o functions for signatures
21
+
22
+ CREATE FUNCTION @extschema@.sig_in(cstring)
23
+ RETURNS signature
24
+ AS 'signature.so', 'sig_in'
25
+ LANGUAGE C STRICT;
26
+
27
+ CREATE FUNCTION @extschema@.sig_out(signature)
28
+ RETURNS cstring
29
+ AS 'signature.so', 'sig_out'
30
+ LANGUAGE C STRICT;
31
+
32
+ -- signature postgresql type
33
+
34
+ CREATE TYPE @extschema@.signature (
35
+ INTERNALLENGTH = VARIABLE,
36
+ INPUT = sig_in,
37
+ OUTPUT = sig_out,
38
+ STORAGE = extended
39
+ );
40
+
41
+ -- functions for signatures
42
+
43
+ CREATE FUNCTION @extschema@.sig_resize( signature, INT )
44
+ RETURNS signature
45
+ AS 'signature.so', 'sig_resize'
46
+ LANGUAGE C STRICT IMMUTABLE;
47
+
48
+ CREATE FUNCTION @extschema@.sig_set( signature, INT, INT )
49
+ RETURNS signature
50
+ AS 'signature.so', 'sig_set'
51
+ LANGUAGE C STRICT IMMUTABLE;
52
+
53
+ CREATE FUNCTION @extschema@.sig_set( signature, INT )
54
+ RETURNS signature
55
+ AS 'signature.so', 'sig_set'
56
+ LANGUAGE C STRICT IMMUTABLE;
57
+
58
+ CREATE FUNCTION @extschema@.sig_get( signature, INT )
59
+ RETURNS INT
60
+ AS 'signature.so', 'sig_get'
61
+ LANGUAGE C STRICT IMMUTABLE;
62
+
63
+ CREATE FUNCTION @extschema@.sig_length( signature )
64
+ RETURNS INT
65
+ AS 'signature.so', 'sig_length'
66
+ LANGUAGE C STRICT IMMUTABLE;
67
+
68
+ CREATE FUNCTION @extschema@.sig_min( signature )
69
+ RETURNS INT
70
+ AS 'signature.so', 'sig_min'
71
+ LANGUAGE C STRICT IMMUTABLE;
72
+
73
+ CREATE FUNCTION @extschema@.sig_and( signature, signature )
74
+ RETURNS signature
75
+ AS 'signature.so', 'sig_and'
76
+ LANGUAGE C STRICT IMMUTABLE;
77
+
78
+ CREATE FUNCTION @extschema@.sig_or( signature, signature )
79
+ RETURNS signature
80
+ AS 'signature.so', 'sig_or'
81
+ LANGUAGE C STRICT IMMUTABLE;
82
+
83
+ CREATE FUNCTION @extschema@.sig_xor( signature )
84
+ RETURNS signature
85
+ AS 'signature.so', 'sig_xor'
86
+ LANGUAGE C STRICT IMMUTABLE;
87
+
88
+ CREATE FUNCTION @extschema@.count( signature )
89
+ RETURNS INT
90
+ AS 'signature.so', 'count'
91
+ LANGUAGE C STRICT IMMUTABLE;
92
+
93
+ CREATE FUNCTION @extschema@.contains( signature, INT )
94
+ RETURNS BOOL
95
+ AS 'signature.so', 'contains'
96
+ LANGUAGE C STRICT IMMUTABLE;
97
+
98
+ CREATE FUNCTION @extschema@.members( signature )
99
+ RETURNS SETOF INT
100
+ AS 'signature.so', 'members'
101
+ LANGUAGE C STRICT IMMUTABLE;
102
+
103
+ CREATE FUNCTION @extschema@.sig_cmp( signature, signature )
104
+ RETURNS INT
105
+ AS 'signature.so', 'sig_cmp'
106
+ LANGUAGE C STRICT IMMUTABLE;
107
+
108
+ CREATE FUNCTION @extschema@.sig_lt( signature, signature )
109
+ RETURNS BOOL
110
+ AS 'signature.so', 'sig_lt'
111
+ LANGUAGE C STRICT IMMUTABLE;
112
+
113
+ CREATE FUNCTION @extschema@.sig_lte( signature, signature )
114
+ RETURNS BOOL
115
+ AS 'signature.so', 'sig_lte'
116
+ LANGUAGE C STRICT IMMUTABLE;
117
+
118
+ CREATE FUNCTION @extschema@.sig_eq( signature, signature )
119
+ RETURNS BOOL
120
+ AS 'signature.so', 'sig_eq'
121
+ LANGUAGE C STRICT IMMUTABLE;
122
+
123
+ CREATE FUNCTION @extschema@.sig_gt( signature, signature )
124
+ RETURNS BOOL
125
+ AS 'signature.so', 'sig_gt'
126
+ LANGUAGE C STRICT IMMUTABLE;
127
+
128
+ CREATE FUNCTION @extschema@.sig_gte( signature, signature )
129
+ RETURNS BOOL
130
+ AS 'signature.so', 'sig_gte'
131
+ LANGUAGE C STRICT IMMUTABLE;
132
+
133
+
134
+ -- operators for signatures
135
+
136
+ CREATE OPERATOR @extschema@.& (
137
+ leftarg = signature,
138
+ rightarg = signature,
139
+ procedure = @extschema@.sig_and,
140
+ commutator = &
141
+ );
142
+
143
+ CREATE OPERATOR @extschema@.| (
144
+ leftarg = signature,
145
+ rightarg = signature,
146
+ procedure = @extschema@.sig_or,
147
+ commutator = |
148
+ );
149
+
150
+ CREATE OPERATOR @extschema@.+ (
151
+ leftarg = signature,
152
+ rightarg = int,
153
+ procedure = @extschema@.sig_set
154
+ );
155
+
156
+ CREATE OPERATOR @extschema@.< (
157
+ leftarg = signature, rightarg = signature, procedure = sig_lt,
158
+ commutator = > , negator = >= ,
159
+ restrict = scalarltsel, join = scalarltjoinsel
160
+ );
161
+
162
+ CREATE OPERATOR @extschema@.<= (
163
+ leftarg = signature, rightarg = signature, procedure = sig_lte,
164
+ commutator = >= , negator = > ,
165
+ restrict = scalarltsel, join = scalarltjoinsel
166
+ );
167
+
168
+ CREATE OPERATOR @extschema@.= (
169
+ leftarg = signature, rightarg = signature, procedure = sig_eq,
170
+ commutator = = , negator = <> ,
171
+ restrict = eqsel, join = eqjoinsel
172
+ );
173
+
174
+ CREATE OPERATOR >= (
175
+ leftarg = signature, rightarg = signature, procedure = sig_gte,
176
+ commutator = <= , negator = < ,
177
+ restrict = scalargtsel, join = scalargtjoinsel
178
+ );
179
+
180
+ CREATE OPERATOR @extschema@.> (
181
+ leftarg = signature, rightarg = signature, procedure = sig_gt,
182
+ commutator = < , negator = <= ,
183
+ restrict = scalargtsel, join = scalargtjoinsel
184
+ );
185
+
186
+ -- index operator classes for signatures
187
+
188
+ CREATE OPERATOR CLASS @extschema@.signature_ops
189
+ DEFAULT FOR TYPE signature USING btree AS
190
+ OPERATOR 1 < ,
191
+ OPERATOR 2 <= ,
192
+ OPERATOR 3 = ,
193
+ OPERATOR 4 >= ,
194
+ OPERATOR 5 > ,
195
+ FUNCTION 1 sig_cmp(signature, signature);
196
+
197
+
198
+ -- aggregate functions for faceting
199
+
200
+ CREATE AGGREGATE @extschema@.collect( signature )
201
+ (
202
+ sfunc = @extschema@.sig_or,
203
+ stype = signature
204
+ );
205
+
206
+ CREATE AGGREGATE @extschema@.filter( signature )
207
+ (
208
+ sfunc = @extschema@.sig_and,
209
+ stype = signature
210
+ );
211
+
212
+ CREATE AGGREGATE @extschema@.signature( INT )
213
+ (
214
+ sfunc = @extschema@.sig_set,
215
+ stype = signature,
216
+ initcond = '0'
217
+ );-- ============================================================================
218
+ -- These functions are common to all bindings of the Repertoire faceting API
219
+ --
220
+ -- Christopher York
221
+ -- MIT Hyperstudio
222
+ -- February 2014
223
+ -- ============================================================================
224
+
225
+
226
+ -- Aggregator to measure how many bits from a loosely-packed id column would be wasted, if
227
+ -- they were all collected into a bitset signature. Returns a float between 0 (no waste)
228
+ -- and 1.0 (all waste). An example of its use:
229
+ --
230
+ -- SELECT wastage(id) FROM nobelists
231
+ -- =# 0.999999
232
+ --
233
+ -- ALTER TABLE nobelists ADD COLUMN _packed_id SERIAL
234
+ -- SELECT wastage(_packed_id) FROM nobelists
235
+ -- =# 0.015625
236
+ --
237
+ CREATE FUNCTION @extschema@.wastage_proportion(state INT[]) RETURNS double precision AS $$
238
+ SELECT (1.0 - (state[1]::double precision / (COALESCE(state[2], 0.0) + 1.0)))
239
+ $$ LANGUAGE sql;
240
+
241
+ CREATE FUNCTION @extschema@.wastage_accum(state INT[], val INT) RETURNS INT[] AS $$
242
+ SELECT ARRAY[ state[1]+1, GREATEST(state[2], val) ];
243
+ $$ LANGUAGE sql;
244
+
245
+ CREATE AGGREGATE @extschema@.wastage( INT )
246
+ (
247
+ sfunc = @extschema@.wastage_accum,
248
+ stype = INT[],
249
+ finalfunc = @extschema@.wastage_proportion,
250
+ initcond = '{0,0}'
251
+ );
@@ -0,0 +1,207 @@
1
+ -- ============================================================================
2
+ -- Faceting API implementing bitmap indices using PostgreSQL's built-in BYTEA
3
+ -- type, processed using plv8 typed arrays.
4
+ --
5
+ -- This API is suitable for deployment on Heroku, where plv8 is installed by
6
+ -- default. Performance is many times better than the VARBIT-based faceting
7
+ -- API, primarily because of optimisations in memory handling in the count
8
+ -- function.
9
+ --
10
+ -- See https://code.google.com/p/plv8js/wiki/PLV8
11
+ -- https://postgres.heroku.com/blog/past/2013/6/5/javascript_in_your_postgres/
12
+ --
13
+ -- Christopher York
14
+ -- MIT Hyperstudio
15
+ -- February 2014
16
+ -- ============================================================================
17
+
18
+ CREATE EXTENSION IF NOT EXISTS plv8;
19
+
20
+ SET bytea_output TO hex;
21
+
22
+ -- these functions are in pl/pgsql, because they involve appending bytea values,
23
+ -- which is easier done with direct access to the || operator
24
+
25
+ CREATE FUNCTION @extschema@.sig_resize( sig BYTEA, bits INT ) RETURNS BYTEA AS $$
26
+ DECLARE
27
+ len INT;
28
+ bytes INT;
29
+ BEGIN
30
+ bytes := ceil(bits / 8.0)::INT;
31
+ len := length(sig);
32
+ IF bytes > len THEN
33
+ -- RAISE NOTICE 'Extending signature from % to % bytes', len, bytes;
34
+ RETURN sig || ('\x' || repeat('00', bytes - len))::BYTEA;
35
+ ELSIF bits < len THEN
36
+ -- no provision in PostgreSQL for truncating a BYTEA
37
+ RETURN sig;
38
+ END IF;
39
+ RETURN sig;
40
+ END $$ LANGUAGE plpgsql STRICT IMMUTABLE;
41
+
42
+ CREATE FUNCTION @extschema@.sig_set( sig BYTEA, pos INT, val INT) RETURNS BYTEA AS $$
43
+ BEGIN
44
+ RETURN set_bit(sig_resize(sig, pos+1), pos, val);
45
+ END $$ LANGUAGE plpgsql STRICT IMMUTABLE;
46
+
47
+ CREATE FUNCTION @extschema@.sig_set( sig BYTEA, pos INT) RETURNS BYTEA AS $$
48
+ BEGIN
49
+ RETURN @extschema@.sig_set(sig, pos, 1);
50
+ END $$ LANGUAGE plpgsql STRICT IMMUTABLE;
51
+
52
+ -- these functions are in javascript, because (1) pl/pgsql is close
53
+ -- to the worst language in the world; (2) plv8's typed arrays make
54
+ -- the count function much faster
55
+
56
+ CREATE FUNCTION @extschema@.sig_get( sig BYTEA, pos INT ) RETURNS INT AS $$
57
+ if (pos <= sig.length * 8) {
58
+ return sig[ Math.floor(pos / 8) ] >> (pos % 8) & 1;
59
+ } else {
60
+ return 0;
61
+ }
62
+ $$ LANGUAGE plv8 STRICT IMMUTABLE;
63
+
64
+ CREATE FUNCTION @extschema@.sig_length( sig BYTEA ) RETURNS INT AS $$
65
+ return sig.length;
66
+ $$ LANGUAGE plv8 STRICT IMMUTABLE;
67
+
68
+ CREATE FUNCTION @extschema@.sig_and(sig1 BYTEA, sig2 BYTEA) RETURNS BYTEA AS $$
69
+ if (sig2.length < sig1.length) {
70
+ var tmp = sig1;
71
+ sig1 = sig2;
72
+ sig2 = tmp;
73
+ }
74
+ for (var i = 0; i < sig1.length; i++) {
75
+ sig1[i] = sig1[i] & sig2[i];
76
+ }
77
+ return sig1;
78
+ $$ LANGUAGE plv8 STRICT IMMUTABLE;
79
+
80
+ CREATE FUNCTION @extschema@.sig_or(sig1 BYTEA, sig2 BYTEA) RETURNS BYTEA AS $$
81
+ if (sig2.length > sig1.length) {
82
+ var tmp = sig1;
83
+ sig1 = sig2;
84
+ sig2 = tmp;
85
+ }
86
+ for (var i = 0; i < sig2.length; i++) {
87
+ sig1[i] = sig1[i] | sig2[i];
88
+ }
89
+ return sig1;
90
+ $$ LANGUAGE plv8 STRICT IMMUTABLE;
91
+
92
+ CREATE FUNCTION @extschema@.count(sig bytea) RETURNS int4 AS $$
93
+ var count_table = [
94
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
95
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
96
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
97
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
98
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
99
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
100
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
101
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
102
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
103
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
104
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
105
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
106
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
107
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
108
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
109
+ 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
110
+ ];
111
+ var count = 0;
112
+ for (var i = 0; i < sig.length; i++) { count += count_table[ sig[i] ]; }
113
+ return count;
114
+ $$ LANGUAGE plv8 STRICT IMMUTABLE;
115
+
116
+ CREATE FUNCTION @extschema@.contains( sig BYTEA, pos INT ) RETURNS BOOL AS $$
117
+ return (pos <= sig.length * 8) && (sig[ Math.floor(pos / 8) ] >> (pos % 8) & 1);
118
+ $$ LANGUAGE plv8 STRICT IMMUTABLE;
119
+
120
+ CREATE FUNCTION @extschema@.members( sig BYTEA ) RETURNS SETOF INT AS $$
121
+ for (var i = 0; i < sig.length; i++) {
122
+ for (var j = 0; j < 8; j++) {
123
+ if (sig[i] >> j & 1) {
124
+ plv8.return_next(i * 8 + j);
125
+ }
126
+ }
127
+ }
128
+ $$ LANGUAGE plv8 STRICT IMMUTABLE;
129
+
130
+
131
+ -- operators for faceting
132
+
133
+ CREATE OPERATOR @extschema@.& (
134
+ leftarg = BYTEA,
135
+ rightarg = BYTEA,
136
+ procedure = @extschema@.sig_and,
137
+ commutator = &
138
+ );
139
+
140
+ CREATE OPERATOR @extschema@.| (
141
+ leftarg = BYTEA,
142
+ rightarg = BYTEA,
143
+ procedure = @extschema@.sig_or,
144
+ commutator = |
145
+ );
146
+
147
+ CREATE OPERATOR @extschema@.+ (
148
+ leftarg = BYTEA,
149
+ rightarg = int,
150
+ procedure = @extschema@.sig_set
151
+ );
152
+
153
+
154
+ -- aggregate functions for faceting
155
+
156
+ CREATE AGGREGATE @extschema@.collect( BYTEA )
157
+ (
158
+ sfunc = @extschema@.sig_or,
159
+ stype = BYTEA
160
+ );
161
+
162
+ CREATE AGGREGATE @extschema@.filter( BYTEA )
163
+ (
164
+ sfunc = @extschema@.sig_and,
165
+ stype = BYTEA
166
+ );
167
+
168
+ CREATE AGGREGATE @extschema@.signature( INT )
169
+ (
170
+ sfunc = @extschema@.sig_set,
171
+ stype = BYTEA,
172
+ initcond = ''
173
+ );-- ============================================================================
174
+ -- These functions are common to all bindings of the Repertoire faceting API
175
+ --
176
+ -- Christopher York
177
+ -- MIT Hyperstudio
178
+ -- February 2014
179
+ -- ============================================================================
180
+
181
+
182
+ -- Aggregator to measure how many bits from a loosely-packed id column would be wasted, if
183
+ -- they were all collected into a bitset signature. Returns a float between 0 (no waste)
184
+ -- and 1.0 (all waste). An example of its use:
185
+ --
186
+ -- SELECT wastage(id) FROM nobelists
187
+ -- =# 0.999999
188
+ --
189
+ -- ALTER TABLE nobelists ADD COLUMN _packed_id SERIAL
190
+ -- SELECT wastage(_packed_id) FROM nobelists
191
+ -- =# 0.015625
192
+ --
193
+ CREATE FUNCTION @extschema@.wastage_proportion(state INT[]) RETURNS double precision AS $$
194
+ SELECT (1.0 - (state[1]::double precision / (COALESCE(state[2], 0.0) + 1.0)))
195
+ $$ LANGUAGE sql;
196
+
197
+ CREATE FUNCTION @extschema@.wastage_accum(state INT[], val INT) RETURNS INT[] AS $$
198
+ SELECT ARRAY[ state[1]+1, GREATEST(state[2], val) ];
199
+ $$ LANGUAGE sql;
200
+
201
+ CREATE AGGREGATE @extschema@.wastage( INT )
202
+ (
203
+ sfunc = @extschema@.wastage_accum,
204
+ stype = INT[],
205
+ finalfunc = @extschema@.wastage_proportion,
206
+ initcond = '{0,0}'
207
+ );