repertoire-faceting 0.5.5 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/FAQ +23 -17
- data/INSTALL +52 -84
- data/LICENSE +1 -1
- data/README +213 -34
- data/TODO +20 -7
- data/ext/Makefile +24 -14
- data/ext/README.faceting +51 -0
- data/ext/bytea/bytea.sql +173 -0
- data/ext/bytea/faceting_bytea.control +6 -0
- data/ext/common/util.sql +35 -0
- data/ext/faceting--0.6.0.sql +251 -0
- data/ext/faceting_bytea--0.6.0.sql +207 -0
- data/ext/faceting_varbit--0.6.0.sql +198 -0
- data/ext/signature/faceting.control +6 -0
- data/ext/signature/signature.c +740 -0
- data/ext/{signature.o → signature/signature.o} +0 -0
- data/ext/{signature.so → signature/signature.so} +0 -0
- data/ext/signature/signature.sql +217 -0
- data/ext/varbit/faceting_varbit.control +7 -0
- data/ext/varbit/varbit.sql +164 -0
- data/{public → lib/assets}/images/repertoire-faceting/proportional_symbol.png +0 -0
- data/{public → lib/assets}/images/repertoire-faceting/spinner_sm.gif +0 -0
- data/{public → lib/assets}/javascripts/rep.faceting/context.js +2 -2
- data/{public → lib/assets}/javascripts/rep.faceting/ext/earth_facet.js +2 -4
- data/{public → lib/assets}/javascripts/rep.faceting/facet.js +1 -1
- data/{public → lib/assets}/javascripts/rep.faceting/facet_widget.js +3 -8
- data/{public → lib/assets}/javascripts/rep.faceting/nested_facet.js +1 -1
- data/{public → lib/assets}/javascripts/rep.faceting/results.js +1 -1
- data/{public → lib/assets}/javascripts/rep.faceting.js +5 -1
- data/{public → lib/assets}/javascripts/rep.protovis-facets.js +3 -3
- data/lib/assets/javascripts/rep.widgets/events.js +51 -0
- data/lib/assets/javascripts/rep.widgets/global.js +50 -0
- data/lib/assets/javascripts/rep.widgets/model.js +159 -0
- data/lib/assets/javascripts/rep.widgets/widget.js +213 -0
- data/lib/assets/javascripts/rep.widgets.js +14 -0
- data/{public → lib/assets}/stylesheets/rep.faceting.css +1 -1
- data/lib/repertoire-faceting/adapters/postgresql_adapter.rb +107 -48
- data/lib/repertoire-faceting/facets/abstract_facet.rb +43 -27
- data/lib/repertoire-faceting/facets/basic_facet.rb +23 -22
- data/lib/repertoire-faceting/facets/nested_facet.rb +50 -27
- data/lib/repertoire-faceting/model.rb +101 -65
- data/lib/repertoire-faceting/rails/engine.rb +8 -0
- data/lib/repertoire-faceting/rails/postgresql_adapter.rb +0 -1
- data/lib/repertoire-faceting/rails/relation.rb +0 -1
- data/lib/repertoire-faceting/railtie.rb +0 -1
- data/lib/repertoire-faceting/relation/calculations.rb +7 -2
- data/lib/repertoire-faceting/relation/query_methods.rb +17 -4
- data/lib/repertoire-faceting/routing.rb +2 -5
- data/lib/repertoire-faceting/tasks/all.rake +5 -4
- data/lib/repertoire-faceting/tasks/client.rake +2 -5
- data/lib/repertoire-faceting/version.rb +1 -1
- data/lib/repertoire-faceting.rb +2 -4
- data/{public → vendor/assets}/javascripts/google-earth-extensions.js +0 -0
- data/{public → vendor/assets}/javascripts/protovis.js +0 -0
- metadata +78 -78
- data/ext/README.signature +0 -33
- data/ext/signature.c +0 -740
- data/ext/signature.sql +0 -342
- data/ext/signature.sql.IN +0 -342
- data/ext/uninstall_signature.sql +0 -4
- data/ext/uninstall_signature.sql.IN +0 -4
- data/lib/repertoire-faceting/adapters/abstract_adapter.rb +0 -18
- data/lib/repertoire-faceting/relation/spawn_methods.rb +0 -26
data/ext/signature.sql
DELETED
@@ -1,342 +0,0 @@
|
|
1
|
-
SET search_path TO 'public';
|
2
|
-
|
3
|
-
CREATE TYPE signature;
|
4
|
-
|
5
|
-
-- basic i/o functions for signatures
|
6
|
-
|
7
|
-
CREATE OR REPLACE FUNCTION sig_in(cstring)
|
8
|
-
RETURNS signature
|
9
|
-
AS 'signature.so', 'sig_in'
|
10
|
-
LANGUAGE C STRICT;
|
11
|
-
|
12
|
-
CREATE OR REPLACE FUNCTION sig_out(signature)
|
13
|
-
RETURNS cstring
|
14
|
-
AS 'signature.so', 'sig_out'
|
15
|
-
LANGUAGE C STRICT;
|
16
|
-
|
17
|
-
-- signature postgresql type
|
18
|
-
|
19
|
-
CREATE TYPE signature (
|
20
|
-
INTERNALLENGTH = VARIABLE,
|
21
|
-
INPUT = sig_in,
|
22
|
-
OUTPUT = sig_out,
|
23
|
-
STORAGE = extended
|
24
|
-
);
|
25
|
-
|
26
|
-
-- functions for signatures
|
27
|
-
|
28
|
-
CREATE OR REPLACE FUNCTION sig_resize( signature, INT )
|
29
|
-
RETURNS signature
|
30
|
-
AS 'signature.so', 'sig_resize'
|
31
|
-
LANGUAGE C STRICT IMMUTABLE;
|
32
|
-
|
33
|
-
CREATE OR REPLACE FUNCTION sig_set( signature, INT, INT )
|
34
|
-
RETURNS signature
|
35
|
-
AS 'signature.so', 'sig_set'
|
36
|
-
LANGUAGE C STRICT IMMUTABLE;
|
37
|
-
|
38
|
-
CREATE OR REPLACE FUNCTION sig_set( signature, INT )
|
39
|
-
RETURNS signature
|
40
|
-
AS 'signature.so', 'sig_set'
|
41
|
-
LANGUAGE C STRICT IMMUTABLE;
|
42
|
-
|
43
|
-
CREATE OR REPLACE FUNCTION sig_get( signature, INT )
|
44
|
-
RETURNS INT
|
45
|
-
AS 'signature.so', 'sig_get'
|
46
|
-
LANGUAGE C STRICT IMMUTABLE;
|
47
|
-
|
48
|
-
CREATE OR REPLACE FUNCTION sig_length( signature )
|
49
|
-
RETURNS INT
|
50
|
-
AS 'signature.so', 'sig_length'
|
51
|
-
LANGUAGE C STRICT IMMUTABLE;
|
52
|
-
|
53
|
-
CREATE OR REPLACE FUNCTION sig_min( signature )
|
54
|
-
RETURNS INT
|
55
|
-
AS 'signature.so', 'sig_min'
|
56
|
-
LANGUAGE C STRICT IMMUTABLE;
|
57
|
-
|
58
|
-
CREATE OR REPLACE FUNCTION sig_and( signature, signature )
|
59
|
-
RETURNS signature
|
60
|
-
AS 'signature.so', 'sig_and'
|
61
|
-
LANGUAGE C STRICT IMMUTABLE;
|
62
|
-
|
63
|
-
CREATE OR REPLACE FUNCTION sig_or( signature, signature )
|
64
|
-
RETURNS signature
|
65
|
-
AS 'signature.so', 'sig_or'
|
66
|
-
LANGUAGE C STRICT IMMUTABLE;
|
67
|
-
|
68
|
-
CREATE OR REPLACE FUNCTION sig_xor( signature )
|
69
|
-
RETURNS signature
|
70
|
-
AS 'signature.so', 'sig_xor'
|
71
|
-
LANGUAGE C STRICT IMMUTABLE;
|
72
|
-
|
73
|
-
CREATE OR REPLACE FUNCTION count( signature )
|
74
|
-
RETURNS INT
|
75
|
-
AS 'signature.so', 'count'
|
76
|
-
LANGUAGE C STRICT IMMUTABLE;
|
77
|
-
|
78
|
-
CREATE OR REPLACE FUNCTION contains( signature, INT )
|
79
|
-
RETURNS BOOL
|
80
|
-
AS 'signature.so', 'contains'
|
81
|
-
LANGUAGE C STRICT IMMUTABLE;
|
82
|
-
|
83
|
-
CREATE OR REPLACE FUNCTION members( signature )
|
84
|
-
RETURNS SETOF INT
|
85
|
-
AS 'signature.so', 'members'
|
86
|
-
LANGUAGE C STRICT IMMUTABLE;
|
87
|
-
|
88
|
-
CREATE OR REPLACE FUNCTION sig_cmp( signature, signature )
|
89
|
-
RETURNS INT
|
90
|
-
AS 'signature.so', 'sig_cmp'
|
91
|
-
LANGUAGE C STRICT IMMUTABLE;
|
92
|
-
|
93
|
-
CREATE OR REPLACE FUNCTION sig_lt( signature, signature )
|
94
|
-
RETURNS BOOL
|
95
|
-
AS 'signature.so', 'sig_lt'
|
96
|
-
LANGUAGE C STRICT IMMUTABLE;
|
97
|
-
|
98
|
-
CREATE OR REPLACE FUNCTION sig_lte( signature, signature )
|
99
|
-
RETURNS BOOL
|
100
|
-
AS 'signature.so', 'sig_lte'
|
101
|
-
LANGUAGE C STRICT IMMUTABLE;
|
102
|
-
|
103
|
-
CREATE OR REPLACE FUNCTION sig_eq( signature, signature )
|
104
|
-
RETURNS BOOL
|
105
|
-
AS 'signature.so', 'sig_eq'
|
106
|
-
LANGUAGE C STRICT IMMUTABLE;
|
107
|
-
|
108
|
-
CREATE OR REPLACE FUNCTION sig_gt( signature, signature )
|
109
|
-
RETURNS BOOL
|
110
|
-
AS 'signature.so', 'sig_gt'
|
111
|
-
LANGUAGE C STRICT IMMUTABLE;
|
112
|
-
|
113
|
-
CREATE OR REPLACE FUNCTION sig_gte( signature, signature )
|
114
|
-
RETURNS BOOL
|
115
|
-
AS 'signature.so', 'sig_gte'
|
116
|
-
LANGUAGE C STRICT IMMUTABLE;
|
117
|
-
|
118
|
-
-- operators for signatures
|
119
|
-
|
120
|
-
CREATE OPERATOR & (
|
121
|
-
leftarg = signature,
|
122
|
-
rightarg = signature,
|
123
|
-
procedure = sig_and,
|
124
|
-
commutator = &
|
125
|
-
);
|
126
|
-
|
127
|
-
CREATE OPERATOR | (
|
128
|
-
leftarg = signature,
|
129
|
-
rightarg = signature,
|
130
|
-
procedure = sig_or,
|
131
|
-
commutator = |
|
132
|
-
);
|
133
|
-
|
134
|
-
CREATE OPERATOR + (
|
135
|
-
leftarg = signature,
|
136
|
-
rightarg = int,
|
137
|
-
procedure = sig_set
|
138
|
-
);
|
139
|
-
|
140
|
-
CREATE OPERATOR < (
|
141
|
-
leftarg = signature, rightarg = signature, procedure = sig_lt,
|
142
|
-
commutator = > , negator = >= ,
|
143
|
-
restrict = scalarltsel, join = scalarltjoinsel
|
144
|
-
);
|
145
|
-
|
146
|
-
CREATE OPERATOR <= (
|
147
|
-
leftarg = signature, rightarg = signature, procedure = sig_lte,
|
148
|
-
commutator = >= , negator = > ,
|
149
|
-
restrict = scalarltsel, join = scalarltjoinsel
|
150
|
-
);
|
151
|
-
|
152
|
-
CREATE OPERATOR = (
|
153
|
-
leftarg = signature, rightarg = signature, procedure = sig_eq,
|
154
|
-
commutator = = , negator = <> ,
|
155
|
-
restrict = eqsel, join = eqjoinsel
|
156
|
-
);
|
157
|
-
|
158
|
-
CREATE OPERATOR >= (
|
159
|
-
leftarg = signature, rightarg = signature, procedure = sig_gte,
|
160
|
-
commutator = <= , negator = < ,
|
161
|
-
restrict = scalargtsel, join = scalargtjoinsel
|
162
|
-
);
|
163
|
-
|
164
|
-
CREATE OPERATOR > (
|
165
|
-
leftarg = signature, rightarg = signature, procedure = sig_gt,
|
166
|
-
commutator = < , negator = <= ,
|
167
|
-
restrict = scalargtsel, join = scalargtjoinsel
|
168
|
-
);
|
169
|
-
|
170
|
-
-- index operator classes for signatures
|
171
|
-
|
172
|
-
CREATE OPERATOR CLASS signature_ops
|
173
|
-
DEFAULT FOR TYPE signature USING btree AS
|
174
|
-
OPERATOR 1 < ,
|
175
|
-
OPERATOR 2 <= ,
|
176
|
-
OPERATOR 3 = ,
|
177
|
-
OPERATOR 4 >= ,
|
178
|
-
OPERATOR 5 > ,
|
179
|
-
FUNCTION 1 sig_cmp(signature, signature);
|
180
|
-
|
181
|
-
-- aggregate functions for faceting
|
182
|
-
|
183
|
-
CREATE AGGREGATE signature( INT )
|
184
|
-
(
|
185
|
-
sfunc = sig_set,
|
186
|
-
stype = signature,
|
187
|
-
initcond = '0'
|
188
|
-
);
|
189
|
-
|
190
|
-
CREATE AGGREGATE collect( signature )
|
191
|
-
(
|
192
|
-
sfunc = sig_or,
|
193
|
-
stype = signature
|
194
|
-
);
|
195
|
-
|
196
|
-
CREATE AGGREGATE filter( signature )
|
197
|
-
(
|
198
|
-
sfunc = sig_and,
|
199
|
-
stype = signature
|
200
|
-
);
|
201
|
-
|
202
|
-
-- utility functions for maintaining facet indices
|
203
|
-
|
204
|
-
-- Utility function to drop and recreate a table, given an sql select statement
|
205
|
-
--
|
206
|
-
CREATE OR REPLACE FUNCTION recreate_table(tbl TEXT, select_expr TEXT) RETURNS VOID AS $$
|
207
|
-
BEGIN
|
208
|
-
SET client_min_messages = warning;
|
209
|
-
EXECUTE 'DROP TABLE IF EXISTS ' || quote_ident(tbl);
|
210
|
-
EXECUTE 'CREATE TABLE ' || quote_ident(tbl) || ' AS ' || select_expr;
|
211
|
-
RESET client_min_messages;
|
212
|
-
END;
|
213
|
-
$$ LANGUAGE plpgsql;
|
214
|
-
|
215
|
-
|
216
|
-
-- Utility function to add or update a packed id column on a table
|
217
|
-
--
|
218
|
-
-- If provided, the threshold indicates a percentage of acceptable wastage or "scatter"
|
219
|
-
-- in the ids, which keeps the packing algorithm from running until absolutely needed.
|
220
|
-
--
|
221
|
-
-- Because ids only become scattered when model rows are deleted, this means repacking
|
222
|
-
-- will occur very infrequently. The default threshold is 15%.
|
223
|
-
--
|
224
|
-
CREATE OR REPLACE FUNCTION renumber_table(tbl TEXT, col TEXT) RETURNS BOOLEAN AS $$
|
225
|
-
BEGIN
|
226
|
-
RETURN renumber_table(tbl, col, 0.15);
|
227
|
-
END;
|
228
|
-
$$ LANGUAGE plpgsql;
|
229
|
-
|
230
|
-
|
231
|
-
CREATE OR REPLACE FUNCTION renumber_table(tbl TEXT, col TEXT, threshold REAL) RETURNS BOOLEAN AS $$
|
232
|
-
DECLARE
|
233
|
-
seq TEXT;
|
234
|
-
wastage REAL;
|
235
|
-
renumber BOOLEAN;
|
236
|
-
BEGIN
|
237
|
-
seq = tbl || '_' || col || '_seq';
|
238
|
-
|
239
|
-
-- Drop numbered column if it already exists
|
240
|
-
SET client_min_messages = 'WARNING';
|
241
|
-
BEGIN
|
242
|
-
IF signature_wastage(tbl, col) <= threshold THEN
|
243
|
-
renumber := false;
|
244
|
-
ELSE
|
245
|
-
renumber := true;
|
246
|
-
EXECUTE 'DROP INDEX IF EXISTS ' || quote_ident(tbl || '_' || col || '_ndx');
|
247
|
-
EXECUTE 'ALTER TABLE ' || quote_ident(tbl) || ' DROP COLUMN ' || quote_ident(col);
|
248
|
-
EXECUTE 'DROP SEQUENCE IF EXISTS ' || quote_ident(seq);
|
249
|
-
END IF;
|
250
|
-
EXCEPTION
|
251
|
-
WHEN undefined_column THEN renumber := true;
|
252
|
-
END;
|
253
|
-
RESET client_min_messages;
|
254
|
-
|
255
|
-
-- Create numbered column & its index
|
256
|
-
IF renumber THEN
|
257
|
-
EXECUTE 'CREATE SEQUENCE ' || quote_ident(seq) || ' MINVALUE 0 ';
|
258
|
-
EXECUTE 'ALTER TABLE ' || quote_ident(tbl) || ' ADD COLUMN ' || quote_ident(col) || ' INT4 DEFAULT nextval(''' || quote_ident(seq) || ''')';
|
259
|
-
EXECUTE 'ALTER SEQUENCE ' || quote_ident(seq) || ' OWNED BY ' || quote_ident(tbl) || '.' || quote_ident(col);
|
260
|
-
EXECUTE 'CREATE INDEX ' || quote_ident(tbl || '_' || col || '_ndx') || ' ON ' || quote_ident(tbl) || '(' || col || ')';
|
261
|
-
END IF;
|
262
|
-
|
263
|
-
RETURN renumber;
|
264
|
-
END;
|
265
|
-
$$ LANGUAGE plpgsql;
|
266
|
-
|
267
|
-
|
268
|
-
-- Utility function to measure how many bits from a loosely-packed id column would be wasted,
|
269
|
-
-- if they were all collected into a bitset signature. Returns a float between 0 (no waste)
|
270
|
-
-- and 1.0 (all waste).
|
271
|
-
--
|
272
|
-
CREATE OR REPLACE FUNCTION signature_wastage(tbl TEXT, col TEXT) RETURNS REAL AS $$
|
273
|
-
DECLARE
|
274
|
-
max REAL;
|
275
|
-
count REAL;
|
276
|
-
BEGIN
|
277
|
-
EXECUTE 'SELECT count(*) FROM ' || quote_ident(tbl)
|
278
|
-
INTO count;
|
279
|
-
EXECUTE 'SELECT max(' || quote_ident(col) || ') FROM ' || quote_ident(tbl)
|
280
|
-
INTO max;
|
281
|
-
RETURN 1.0 - (count / (COALESCE(max, 0) + 1));
|
282
|
-
END;
|
283
|
-
$$ LANGUAGE plpgsql;
|
284
|
-
|
285
|
-
|
286
|
-
-- Utility function to identify columns for a nested facet index
|
287
|
-
--
|
288
|
-
CREATE OR REPLACE FUNCTION nest_levels(tbl TEXT) RETURNS SETOF TEXT AS $$
|
289
|
-
SELECT quote_ident(a.attname::TEXT)
|
290
|
-
FROM pg_attribute a LEFT JOIN pg_attrdef d ON a.attrelid = d.adrelid AND a.attnum = d.adnum
|
291
|
-
WHERE a.attrelid = $1::regclass
|
292
|
-
AND NOT a.attname IN ('signature', 'level')
|
293
|
-
AND a.attnum > 0 AND NOT a.attisdropped
|
294
|
-
ORDER BY a.attnum;
|
295
|
-
$$ LANGUAGE sql;
|
296
|
-
|
297
|
-
|
298
|
-
--Utility function to expand nesting in facet indices
|
299
|
-
--
|
300
|
-
-- Initially a facet index will include only leaves of the
|
301
|
-
-- nesting tree. This function adds all interior nodes
|
302
|
-
-- with their respective aggregate signatures, and adds a
|
303
|
-
-- postgresql index to the nested facet value.
|
304
|
-
--
|
305
|
-
-- e.g. given the nested facet values
|
306
|
-
-- {USA,Florida} '10'
|
307
|
-
-- {USA,Iowa} '01'
|
308
|
-
--
|
309
|
-
-- the function interpolates
|
310
|
-
-- {USA} '11'
|
311
|
-
--
|
312
|
-
-- N.B. expand_nesting may only be called once on a table
|
313
|
-
-- it refuses to add internal node duplicates
|
314
|
-
--
|
315
|
-
CREATE OR REPLACE FUNCTION expand_nesting(tbl TEXT) RETURNS VOID AS $$
|
316
|
-
DECLARE
|
317
|
-
cols TEXT[];
|
318
|
-
len INT;
|
319
|
-
aggr TEXT;
|
320
|
-
BEGIN
|
321
|
-
-- determine column names
|
322
|
-
SELECT array_agg(col) INTO cols FROM nest_levels(tbl) AS col;
|
323
|
-
len := array_length(cols, 1);
|
324
|
-
|
325
|
-
-- add unique index on facet value columns
|
326
|
-
aggr := array_to_string(cols, ', ');
|
327
|
-
EXECUTE 'CREATE UNIQUE INDEX ' || quote_ident(tbl) || '_ndx ON ' || quote_ident(tbl) || '(' || aggr || ')';
|
328
|
-
|
329
|
-
-- expand each level in turn
|
330
|
-
FOR i IN REVERSE (len-1)..1 LOOP
|
331
|
-
aggr := array_to_string(cols[1:i], ', ');
|
332
|
-
EXECUTE 'INSERT INTO ' || quote_ident(tbl) || '(' || aggr || ', level, signature)'
|
333
|
-
|| ' SELECT ' || aggr || ', ' || i || ' AS level, collect(signature)'
|
334
|
-
|| ' FROM ' || quote_ident(tbl)
|
335
|
-
|| ' GROUP BY ' || aggr;
|
336
|
-
END LOOP;
|
337
|
-
|
338
|
-
-- root node
|
339
|
-
EXECUTE 'INSERT INTO ' || quote_ident(tbl) || '(level, signature)'
|
340
|
-
|| ' SELECT 0 AS level, collect(signature) FROM ' || quote_ident(tbl);
|
341
|
-
END;
|
342
|
-
$$ LANGUAGE plpgsql;
|
data/ext/signature.sql.IN
DELETED
@@ -1,342 +0,0 @@
|
|
1
|
-
SET search_path TO 'public';
|
2
|
-
|
3
|
-
CREATE TYPE signature;
|
4
|
-
|
5
|
-
-- basic i/o functions for signatures
|
6
|
-
|
7
|
-
CREATE OR REPLACE FUNCTION sig_in(cstring)
|
8
|
-
RETURNS signature
|
9
|
-
AS 'signature.so', 'sig_in'
|
10
|
-
LANGUAGE C STRICT;
|
11
|
-
|
12
|
-
CREATE OR REPLACE FUNCTION sig_out(signature)
|
13
|
-
RETURNS cstring
|
14
|
-
AS 'signature.so', 'sig_out'
|
15
|
-
LANGUAGE C STRICT;
|
16
|
-
|
17
|
-
-- signature postgresql type
|
18
|
-
|
19
|
-
CREATE TYPE signature (
|
20
|
-
INTERNALLENGTH = VARIABLE,
|
21
|
-
INPUT = sig_in,
|
22
|
-
OUTPUT = sig_out,
|
23
|
-
STORAGE = extended
|
24
|
-
);
|
25
|
-
|
26
|
-
-- functions for signatures
|
27
|
-
|
28
|
-
CREATE OR REPLACE FUNCTION sig_resize( signature, INT )
|
29
|
-
RETURNS signature
|
30
|
-
AS 'signature.so', 'sig_resize'
|
31
|
-
LANGUAGE C STRICT IMMUTABLE;
|
32
|
-
|
33
|
-
CREATE OR REPLACE FUNCTION sig_set( signature, INT, INT )
|
34
|
-
RETURNS signature
|
35
|
-
AS 'signature.so', 'sig_set'
|
36
|
-
LANGUAGE C STRICT IMMUTABLE;
|
37
|
-
|
38
|
-
CREATE OR REPLACE FUNCTION sig_set( signature, INT )
|
39
|
-
RETURNS signature
|
40
|
-
AS 'signature.so', 'sig_set'
|
41
|
-
LANGUAGE C STRICT IMMUTABLE;
|
42
|
-
|
43
|
-
CREATE OR REPLACE FUNCTION sig_get( signature, INT )
|
44
|
-
RETURNS INT
|
45
|
-
AS 'signature.so', 'sig_get'
|
46
|
-
LANGUAGE C STRICT IMMUTABLE;
|
47
|
-
|
48
|
-
CREATE OR REPLACE FUNCTION sig_length( signature )
|
49
|
-
RETURNS INT
|
50
|
-
AS 'signature.so', 'sig_length'
|
51
|
-
LANGUAGE C STRICT IMMUTABLE;
|
52
|
-
|
53
|
-
CREATE OR REPLACE FUNCTION sig_min( signature )
|
54
|
-
RETURNS INT
|
55
|
-
AS 'signature.so', 'sig_min'
|
56
|
-
LANGUAGE C STRICT IMMUTABLE;
|
57
|
-
|
58
|
-
CREATE OR REPLACE FUNCTION sig_and( signature, signature )
|
59
|
-
RETURNS signature
|
60
|
-
AS 'signature.so', 'sig_and'
|
61
|
-
LANGUAGE C STRICT IMMUTABLE;
|
62
|
-
|
63
|
-
CREATE OR REPLACE FUNCTION sig_or( signature, signature )
|
64
|
-
RETURNS signature
|
65
|
-
AS 'signature.so', 'sig_or'
|
66
|
-
LANGUAGE C STRICT IMMUTABLE;
|
67
|
-
|
68
|
-
CREATE OR REPLACE FUNCTION sig_xor( signature )
|
69
|
-
RETURNS signature
|
70
|
-
AS 'signature.so', 'sig_xor'
|
71
|
-
LANGUAGE C STRICT IMMUTABLE;
|
72
|
-
|
73
|
-
CREATE OR REPLACE FUNCTION count( signature )
|
74
|
-
RETURNS INT
|
75
|
-
AS 'signature.so', 'count'
|
76
|
-
LANGUAGE C STRICT IMMUTABLE;
|
77
|
-
|
78
|
-
CREATE OR REPLACE FUNCTION contains( signature, INT )
|
79
|
-
RETURNS BOOL
|
80
|
-
AS 'signature.so', 'contains'
|
81
|
-
LANGUAGE C STRICT IMMUTABLE;
|
82
|
-
|
83
|
-
CREATE OR REPLACE FUNCTION members( signature )
|
84
|
-
RETURNS SETOF INT
|
85
|
-
AS 'signature.so', 'members'
|
86
|
-
LANGUAGE C STRICT IMMUTABLE;
|
87
|
-
|
88
|
-
CREATE OR REPLACE FUNCTION sig_cmp( signature, signature )
|
89
|
-
RETURNS INT
|
90
|
-
AS 'signature.so', 'sig_cmp'
|
91
|
-
LANGUAGE C STRICT IMMUTABLE;
|
92
|
-
|
93
|
-
CREATE OR REPLACE FUNCTION sig_lt( signature, signature )
|
94
|
-
RETURNS BOOL
|
95
|
-
AS 'signature.so', 'sig_lt'
|
96
|
-
LANGUAGE C STRICT IMMUTABLE;
|
97
|
-
|
98
|
-
CREATE OR REPLACE FUNCTION sig_lte( signature, signature )
|
99
|
-
RETURNS BOOL
|
100
|
-
AS 'signature.so', 'sig_lte'
|
101
|
-
LANGUAGE C STRICT IMMUTABLE;
|
102
|
-
|
103
|
-
CREATE OR REPLACE FUNCTION sig_eq( signature, signature )
|
104
|
-
RETURNS BOOL
|
105
|
-
AS 'signature.so', 'sig_eq'
|
106
|
-
LANGUAGE C STRICT IMMUTABLE;
|
107
|
-
|
108
|
-
CREATE OR REPLACE FUNCTION sig_gt( signature, signature )
|
109
|
-
RETURNS BOOL
|
110
|
-
AS 'signature.so', 'sig_gt'
|
111
|
-
LANGUAGE C STRICT IMMUTABLE;
|
112
|
-
|
113
|
-
CREATE OR REPLACE FUNCTION sig_gte( signature, signature )
|
114
|
-
RETURNS BOOL
|
115
|
-
AS 'signature.so', 'sig_gte'
|
116
|
-
LANGUAGE C STRICT IMMUTABLE;
|
117
|
-
|
118
|
-
-- operators for signatures
|
119
|
-
|
120
|
-
CREATE OPERATOR & (
|
121
|
-
leftarg = signature,
|
122
|
-
rightarg = signature,
|
123
|
-
procedure = sig_and,
|
124
|
-
commutator = &
|
125
|
-
);
|
126
|
-
|
127
|
-
CREATE OPERATOR | (
|
128
|
-
leftarg = signature,
|
129
|
-
rightarg = signature,
|
130
|
-
procedure = sig_or,
|
131
|
-
commutator = |
|
132
|
-
);
|
133
|
-
|
134
|
-
CREATE OPERATOR + (
|
135
|
-
leftarg = signature,
|
136
|
-
rightarg = int,
|
137
|
-
procedure = sig_set
|
138
|
-
);
|
139
|
-
|
140
|
-
CREATE OPERATOR < (
|
141
|
-
leftarg = signature, rightarg = signature, procedure = sig_lt,
|
142
|
-
commutator = > , negator = >= ,
|
143
|
-
restrict = scalarltsel, join = scalarltjoinsel
|
144
|
-
);
|
145
|
-
|
146
|
-
CREATE OPERATOR <= (
|
147
|
-
leftarg = signature, rightarg = signature, procedure = sig_lte,
|
148
|
-
commutator = >= , negator = > ,
|
149
|
-
restrict = scalarltsel, join = scalarltjoinsel
|
150
|
-
);
|
151
|
-
|
152
|
-
CREATE OPERATOR = (
|
153
|
-
leftarg = signature, rightarg = signature, procedure = sig_eq,
|
154
|
-
commutator = = , negator = <> ,
|
155
|
-
restrict = eqsel, join = eqjoinsel
|
156
|
-
);
|
157
|
-
|
158
|
-
CREATE OPERATOR >= (
|
159
|
-
leftarg = signature, rightarg = signature, procedure = sig_gte,
|
160
|
-
commutator = <= , negator = < ,
|
161
|
-
restrict = scalargtsel, join = scalargtjoinsel
|
162
|
-
);
|
163
|
-
|
164
|
-
CREATE OPERATOR > (
|
165
|
-
leftarg = signature, rightarg = signature, procedure = sig_gt,
|
166
|
-
commutator = < , negator = <= ,
|
167
|
-
restrict = scalargtsel, join = scalargtjoinsel
|
168
|
-
);
|
169
|
-
|
170
|
-
-- index operator classes for signatures
|
171
|
-
|
172
|
-
CREATE OPERATOR CLASS signature_ops
|
173
|
-
DEFAULT FOR TYPE signature USING btree AS
|
174
|
-
OPERATOR 1 < ,
|
175
|
-
OPERATOR 2 <= ,
|
176
|
-
OPERATOR 3 = ,
|
177
|
-
OPERATOR 4 >= ,
|
178
|
-
OPERATOR 5 > ,
|
179
|
-
FUNCTION 1 sig_cmp(signature, signature);
|
180
|
-
|
181
|
-
-- aggregate functions for faceting
|
182
|
-
|
183
|
-
CREATE AGGREGATE signature( INT )
|
184
|
-
(
|
185
|
-
sfunc = sig_set,
|
186
|
-
stype = signature,
|
187
|
-
initcond = '0'
|
188
|
-
);
|
189
|
-
|
190
|
-
CREATE AGGREGATE collect( signature )
|
191
|
-
(
|
192
|
-
sfunc = sig_or,
|
193
|
-
stype = signature
|
194
|
-
);
|
195
|
-
|
196
|
-
CREATE AGGREGATE filter( signature )
|
197
|
-
(
|
198
|
-
sfunc = sig_and,
|
199
|
-
stype = signature
|
200
|
-
);
|
201
|
-
|
202
|
-
-- utility functions for maintaining facet indices
|
203
|
-
|
204
|
-
-- Utility function to drop and recreate a table, given an sql select statement
|
205
|
-
--
|
206
|
-
CREATE OR REPLACE FUNCTION recreate_table(tbl TEXT, select_expr TEXT) RETURNS VOID AS $$
|
207
|
-
BEGIN
|
208
|
-
SET client_min_messages = warning;
|
209
|
-
EXECUTE 'DROP TABLE IF EXISTS ' || quote_ident(tbl);
|
210
|
-
EXECUTE 'CREATE TABLE ' || quote_ident(tbl) || ' AS ' || select_expr;
|
211
|
-
RESET client_min_messages;
|
212
|
-
END;
|
213
|
-
$$ LANGUAGE plpgsql;
|
214
|
-
|
215
|
-
|
216
|
-
-- Utility function to add or update a packed id column on a table
|
217
|
-
--
|
218
|
-
-- If provided, the threshold indicates a percentage of acceptable wastage or "scatter"
|
219
|
-
-- in the ids, which keeps the packing algorithm from running until absolutely needed.
|
220
|
-
--
|
221
|
-
-- Because ids only become scattered when model rows are deleted, this means repacking
|
222
|
-
-- will occur very infrequently. The default threshold is 15%.
|
223
|
-
--
|
224
|
-
CREATE OR REPLACE FUNCTION renumber_table(tbl TEXT, col TEXT) RETURNS BOOLEAN AS $$
|
225
|
-
BEGIN
|
226
|
-
RETURN renumber_table(tbl, col, 0.15);
|
227
|
-
END;
|
228
|
-
$$ LANGUAGE plpgsql;
|
229
|
-
|
230
|
-
|
231
|
-
CREATE OR REPLACE FUNCTION renumber_table(tbl TEXT, col TEXT, threshold REAL) RETURNS BOOLEAN AS $$
|
232
|
-
DECLARE
|
233
|
-
seq TEXT;
|
234
|
-
wastage REAL;
|
235
|
-
renumber BOOLEAN;
|
236
|
-
BEGIN
|
237
|
-
seq = tbl || '_' || col || '_seq';
|
238
|
-
|
239
|
-
-- Drop numbered column if it already exists
|
240
|
-
SET client_min_messages = 'WARNING';
|
241
|
-
BEGIN
|
242
|
-
IF signature_wastage(tbl, col) <= threshold THEN
|
243
|
-
renumber := false;
|
244
|
-
ELSE
|
245
|
-
renumber := true;
|
246
|
-
EXECUTE 'DROP INDEX IF EXISTS ' || quote_ident(tbl || '_' || col || '_ndx');
|
247
|
-
EXECUTE 'ALTER TABLE ' || quote_ident(tbl) || ' DROP COLUMN ' || quote_ident(col);
|
248
|
-
EXECUTE 'DROP SEQUENCE IF EXISTS ' || quote_ident(seq);
|
249
|
-
END IF;
|
250
|
-
EXCEPTION
|
251
|
-
WHEN undefined_column THEN renumber := true;
|
252
|
-
END;
|
253
|
-
RESET client_min_messages;
|
254
|
-
|
255
|
-
-- Create numbered column & its index
|
256
|
-
IF renumber THEN
|
257
|
-
EXECUTE 'CREATE SEQUENCE ' || quote_ident(seq) || ' MINVALUE 0 ';
|
258
|
-
EXECUTE 'ALTER TABLE ' || quote_ident(tbl) || ' ADD COLUMN ' || quote_ident(col) || ' INT4 DEFAULT nextval(''' || quote_ident(seq) || ''')';
|
259
|
-
EXECUTE 'ALTER SEQUENCE ' || quote_ident(seq) || ' OWNED BY ' || quote_ident(tbl) || '.' || quote_ident(col);
|
260
|
-
EXECUTE 'CREATE INDEX ' || quote_ident(tbl || '_' || col || '_ndx') || ' ON ' || quote_ident(tbl) || '(' || col || ')';
|
261
|
-
END IF;
|
262
|
-
|
263
|
-
RETURN renumber;
|
264
|
-
END;
|
265
|
-
$$ LANGUAGE plpgsql;
|
266
|
-
|
267
|
-
|
268
|
-
-- Utility function to measure how many bits from a loosely-packed id column would be wasted,
|
269
|
-
-- if they were all collected into a bitset signature. Returns a float between 0 (no waste)
|
270
|
-
-- and 1.0 (all waste).
|
271
|
-
--
|
272
|
-
CREATE OR REPLACE FUNCTION signature_wastage(tbl TEXT, col TEXT) RETURNS REAL AS $$
|
273
|
-
DECLARE
|
274
|
-
max REAL;
|
275
|
-
count REAL;
|
276
|
-
BEGIN
|
277
|
-
EXECUTE 'SELECT count(*) FROM ' || quote_ident(tbl)
|
278
|
-
INTO count;
|
279
|
-
EXECUTE 'SELECT max(' || quote_ident(col) || ') FROM ' || quote_ident(tbl)
|
280
|
-
INTO max;
|
281
|
-
RETURN 1.0 - (count / (COALESCE(max, 0) + 1));
|
282
|
-
END;
|
283
|
-
$$ LANGUAGE plpgsql;
|
284
|
-
|
285
|
-
|
286
|
-
-- Utility function to identify columns for a nested facet index
|
287
|
-
--
|
288
|
-
CREATE OR REPLACE FUNCTION nest_levels(tbl TEXT) RETURNS SETOF TEXT AS $$
|
289
|
-
SELECT quote_ident(a.attname::TEXT)
|
290
|
-
FROM pg_attribute a LEFT JOIN pg_attrdef d ON a.attrelid = d.adrelid AND a.attnum = d.adnum
|
291
|
-
WHERE a.attrelid = $1::regclass
|
292
|
-
AND NOT a.attname IN ('signature', 'level')
|
293
|
-
AND a.attnum > 0 AND NOT a.attisdropped
|
294
|
-
ORDER BY a.attnum;
|
295
|
-
$$ LANGUAGE sql;
|
296
|
-
|
297
|
-
|
298
|
-
--Utility function to expand nesting in facet indices
|
299
|
-
--
|
300
|
-
-- Initially a facet index will include only leaves of the
|
301
|
-
-- nesting tree. This function adds all interior nodes
|
302
|
-
-- with their respective aggregate signatures, and adds a
|
303
|
-
-- postgresql index to the nested facet value.
|
304
|
-
--
|
305
|
-
-- e.g. given the nested facet values
|
306
|
-
-- {USA,Florida} '10'
|
307
|
-
-- {USA,Iowa} '01'
|
308
|
-
--
|
309
|
-
-- the function interpolates
|
310
|
-
-- {USA} '11'
|
311
|
-
--
|
312
|
-
-- N.B. expand_nesting may only be called once on a table
|
313
|
-
-- it refuses to add internal node duplicates
|
314
|
-
--
|
315
|
-
CREATE OR REPLACE FUNCTION expand_nesting(tbl TEXT) RETURNS VOID AS $$
|
316
|
-
DECLARE
|
317
|
-
cols TEXT[];
|
318
|
-
len INT;
|
319
|
-
aggr TEXT;
|
320
|
-
BEGIN
|
321
|
-
-- determine column names
|
322
|
-
SELECT array_agg(col) INTO cols FROM nest_levels(tbl) AS col;
|
323
|
-
len := array_length(cols, 1);
|
324
|
-
|
325
|
-
-- add unique index on facet value columns
|
326
|
-
aggr := array_to_string(cols, ', ');
|
327
|
-
EXECUTE 'CREATE UNIQUE INDEX ' || quote_ident(tbl) || '_ndx ON ' || quote_ident(tbl) || '(' || aggr || ')';
|
328
|
-
|
329
|
-
-- expand each level in turn
|
330
|
-
FOR i IN REVERSE (len-1)..1 LOOP
|
331
|
-
aggr := array_to_string(cols[1:i], ', ');
|
332
|
-
EXECUTE 'INSERT INTO ' || quote_ident(tbl) || '(' || aggr || ', level, signature)'
|
333
|
-
|| ' SELECT ' || aggr || ', ' || i || ' AS level, collect(signature)'
|
334
|
-
|| ' FROM ' || quote_ident(tbl)
|
335
|
-
|| ' GROUP BY ' || aggr;
|
336
|
-
END LOOP;
|
337
|
-
|
338
|
-
-- root node
|
339
|
-
EXECUTE 'INSERT INTO ' || quote_ident(tbl) || '(level, signature)'
|
340
|
-
|| ' SELECT 0 AS level, collect(signature) FROM ' || quote_ident(tbl);
|
341
|
-
END;
|
342
|
-
$$ LANGUAGE plpgsql;
|
data/ext/uninstall_signature.sql
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
module Repertoire
|
2
|
-
module Faceting
|
3
|
-
module AbstractAdapter #:nodoc:
|
4
|
-
|
5
|
-
# Returns the name of the facet index table for a given facet and model
|
6
|
-
def facet_table_name(model_name, name)
|
7
|
-
"_#{model_name}_#{name}_facet"
|
8
|
-
end
|
9
|
-
|
10
|
-
# Returns a list of the indexed facets on a model in the database (even
|
11
|
-
# if no longer declared on the ruby model)
|
12
|
-
def indexed_facets(model_name)
|
13
|
-
tables.grep(/_#{model_name}_(\w+)_facet/) { $1 }
|
14
|
-
end
|
15
|
-
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|