repertoire-faceting 0.5.5 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/FAQ +23 -17
- data/INSTALL +52 -84
- data/LICENSE +1 -1
- data/README +213 -34
- data/TODO +20 -7
- data/ext/Makefile +24 -14
- data/ext/README.faceting +51 -0
- data/ext/bytea/bytea.sql +173 -0
- data/ext/bytea/faceting_bytea.control +6 -0
- data/ext/common/util.sql +35 -0
- data/ext/faceting--0.6.0.sql +251 -0
- data/ext/faceting_bytea--0.6.0.sql +207 -0
- data/ext/faceting_varbit--0.6.0.sql +198 -0
- data/ext/signature/faceting.control +6 -0
- data/ext/signature/signature.c +740 -0
- data/ext/{signature.o → signature/signature.o} +0 -0
- data/ext/{signature.so → signature/signature.so} +0 -0
- data/ext/signature/signature.sql +217 -0
- data/ext/varbit/faceting_varbit.control +7 -0
- data/ext/varbit/varbit.sql +164 -0
- data/{public → lib/assets}/images/repertoire-faceting/proportional_symbol.png +0 -0
- data/{public → lib/assets}/images/repertoire-faceting/spinner_sm.gif +0 -0
- data/{public → lib/assets}/javascripts/rep.faceting/context.js +2 -2
- data/{public → lib/assets}/javascripts/rep.faceting/ext/earth_facet.js +2 -4
- data/{public → lib/assets}/javascripts/rep.faceting/facet.js +1 -1
- data/{public → lib/assets}/javascripts/rep.faceting/facet_widget.js +3 -8
- data/{public → lib/assets}/javascripts/rep.faceting/nested_facet.js +1 -1
- data/{public → lib/assets}/javascripts/rep.faceting/results.js +1 -1
- data/{public → lib/assets}/javascripts/rep.faceting.js +5 -1
- data/{public → lib/assets}/javascripts/rep.protovis-facets.js +3 -3
- data/lib/assets/javascripts/rep.widgets/events.js +51 -0
- data/lib/assets/javascripts/rep.widgets/global.js +50 -0
- data/lib/assets/javascripts/rep.widgets/model.js +159 -0
- data/lib/assets/javascripts/rep.widgets/widget.js +213 -0
- data/lib/assets/javascripts/rep.widgets.js +14 -0
- data/{public → lib/assets}/stylesheets/rep.faceting.css +1 -1
- data/lib/repertoire-faceting/adapters/postgresql_adapter.rb +107 -48
- data/lib/repertoire-faceting/facets/abstract_facet.rb +43 -27
- data/lib/repertoire-faceting/facets/basic_facet.rb +23 -22
- data/lib/repertoire-faceting/facets/nested_facet.rb +50 -27
- data/lib/repertoire-faceting/model.rb +101 -65
- data/lib/repertoire-faceting/rails/engine.rb +8 -0
- data/lib/repertoire-faceting/rails/postgresql_adapter.rb +0 -1
- data/lib/repertoire-faceting/rails/relation.rb +0 -1
- data/lib/repertoire-faceting/railtie.rb +0 -1
- data/lib/repertoire-faceting/relation/calculations.rb +7 -2
- data/lib/repertoire-faceting/relation/query_methods.rb +17 -4
- data/lib/repertoire-faceting/routing.rb +2 -5
- data/lib/repertoire-faceting/tasks/all.rake +5 -4
- data/lib/repertoire-faceting/tasks/client.rake +2 -5
- data/lib/repertoire-faceting/version.rb +1 -1
- data/lib/repertoire-faceting.rb +2 -4
- data/{public → vendor/assets}/javascripts/google-earth-extensions.js +0 -0
- data/{public → vendor/assets}/javascripts/protovis.js +0 -0
- metadata +78 -78
- data/ext/README.signature +0 -33
- data/ext/signature.c +0 -740
- data/ext/signature.sql +0 -342
- data/ext/signature.sql.IN +0 -342
- data/ext/uninstall_signature.sql +0 -4
- data/ext/uninstall_signature.sql.IN +0 -4
- data/lib/repertoire-faceting/adapters/abstract_adapter.rb +0 -18
- data/lib/repertoire-faceting/relation/spawn_methods.rb +0 -26
data/ext/bytea/bytea.sql
ADDED
@@ -0,0 +1,173 @@
|
|
1
|
+
-- ============================================================================
|
2
|
+
-- Faceting API implementing bitmap indices using PostgreSQL's built-in BYTEA
|
3
|
+
-- type, processed using plv8 typed arrays.
|
4
|
+
--
|
5
|
+
-- This API is suitable for deployment on Heroku, where plv8 is installed by
|
6
|
+
-- default. Performance is many times better than the VARBIT-based faceting
|
7
|
+
-- API, primarily because of optimisations in memory handling in the count
|
8
|
+
-- function.
|
9
|
+
--
|
10
|
+
-- See https://code.google.com/p/plv8js/wiki/PLV8
|
11
|
+
-- https://postgres.heroku.com/blog/past/2013/6/5/javascript_in_your_postgres/
|
12
|
+
--
|
13
|
+
-- Christopher York
|
14
|
+
-- MIT Hyperstudio
|
15
|
+
-- February 2014
|
16
|
+
-- ============================================================================
|
17
|
+
|
18
|
+
CREATE EXTENSION IF NOT EXISTS plv8;
|
19
|
+
|
20
|
+
SET bytea_output TO hex;
|
21
|
+
|
22
|
+
-- these functions are in pl/pgsql, because they involve appending bytea values,
|
23
|
+
-- which is easier done with direct access to the || operator
|
24
|
+
|
25
|
+
CREATE FUNCTION @extschema@.sig_resize( sig BYTEA, bits INT ) RETURNS BYTEA AS $$
|
26
|
+
DECLARE
|
27
|
+
len INT;
|
28
|
+
bytes INT;
|
29
|
+
BEGIN
|
30
|
+
bytes := ceil(bits / 8.0)::INT;
|
31
|
+
len := length(sig);
|
32
|
+
IF bytes > len THEN
|
33
|
+
-- RAISE NOTICE 'Extending signature from % to % bytes', len, bytes;
|
34
|
+
RETURN sig || ('\x' || repeat('00', bytes - len))::BYTEA;
|
35
|
+
ELSIF bits < len THEN
|
36
|
+
-- no provision in PostgreSQL for truncating a BYTEA
|
37
|
+
RETURN sig;
|
38
|
+
END IF;
|
39
|
+
RETURN sig;
|
40
|
+
END $$ LANGUAGE plpgsql STRICT IMMUTABLE;
|
41
|
+
|
42
|
+
CREATE FUNCTION @extschema@.sig_set( sig BYTEA, pos INT, val INT) RETURNS BYTEA AS $$
|
43
|
+
BEGIN
|
44
|
+
RETURN set_bit(sig_resize(sig, pos+1), pos, val);
|
45
|
+
END $$ LANGUAGE plpgsql STRICT IMMUTABLE;
|
46
|
+
|
47
|
+
CREATE FUNCTION @extschema@.sig_set( sig BYTEA, pos INT) RETURNS BYTEA AS $$
|
48
|
+
BEGIN
|
49
|
+
RETURN @extschema@.sig_set(sig, pos, 1);
|
50
|
+
END $$ LANGUAGE plpgsql STRICT IMMUTABLE;
|
51
|
+
|
52
|
+
-- these functions are in javascript, because (1) pl/pgsql is close
|
53
|
+
-- to the worst language in the world; (2) plv8's typed arrays make
|
54
|
+
-- the count function much faster
|
55
|
+
|
56
|
+
CREATE FUNCTION @extschema@.sig_get( sig BYTEA, pos INT ) RETURNS INT AS $$
|
57
|
+
if (pos <= sig.length * 8) {
|
58
|
+
return sig[ Math.floor(pos / 8) ] >> (pos % 8) & 1;
|
59
|
+
} else {
|
60
|
+
return 0;
|
61
|
+
}
|
62
|
+
$$ LANGUAGE plv8 STRICT IMMUTABLE;
|
63
|
+
|
64
|
+
CREATE FUNCTION @extschema@.sig_length( sig BYTEA ) RETURNS INT AS $$
|
65
|
+
return sig.length;
|
66
|
+
$$ LANGUAGE plv8 STRICT IMMUTABLE;
|
67
|
+
|
68
|
+
CREATE FUNCTION @extschema@.sig_and(sig1 BYTEA, sig2 BYTEA) RETURNS BYTEA AS $$
|
69
|
+
if (sig2.length < sig1.length) {
|
70
|
+
var tmp = sig1;
|
71
|
+
sig1 = sig2;
|
72
|
+
sig2 = tmp;
|
73
|
+
}
|
74
|
+
for (var i = 0; i < sig1.length; i++) {
|
75
|
+
sig1[i] = sig1[i] & sig2[i];
|
76
|
+
}
|
77
|
+
return sig1;
|
78
|
+
$$ LANGUAGE plv8 STRICT IMMUTABLE;
|
79
|
+
|
80
|
+
CREATE FUNCTION @extschema@.sig_or(sig1 BYTEA, sig2 BYTEA) RETURNS BYTEA AS $$
|
81
|
+
if (sig2.length > sig1.length) {
|
82
|
+
var tmp = sig1;
|
83
|
+
sig1 = sig2;
|
84
|
+
sig2 = tmp;
|
85
|
+
}
|
86
|
+
for (var i = 0; i < sig2.length; i++) {
|
87
|
+
sig1[i] = sig1[i] | sig2[i];
|
88
|
+
}
|
89
|
+
return sig1;
|
90
|
+
$$ LANGUAGE plv8 STRICT IMMUTABLE;
|
91
|
+
|
92
|
+
CREATE FUNCTION @extschema@.count(sig bytea) RETURNS int4 AS $$
|
93
|
+
var count_table = [
|
94
|
+
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
|
95
|
+
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
96
|
+
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
97
|
+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
98
|
+
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
99
|
+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
100
|
+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
101
|
+
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
102
|
+
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
103
|
+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
104
|
+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
105
|
+
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
106
|
+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
107
|
+
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
108
|
+
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
109
|
+
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
|
110
|
+
];
|
111
|
+
var count = 0;
|
112
|
+
for (var i = 0; i < sig.length; i++) { count += count_table[ sig[i] ]; }
|
113
|
+
return count;
|
114
|
+
$$ LANGUAGE plv8 STRICT IMMUTABLE;
|
115
|
+
|
116
|
+
CREATE FUNCTION @extschema@.contains( sig BYTEA, pos INT ) RETURNS BOOL AS $$
|
117
|
+
return (pos <= sig.length * 8) && (sig[ Math.floor(pos / 8) ] >> (pos % 8) & 1);
|
118
|
+
$$ LANGUAGE plv8 STRICT IMMUTABLE;
|
119
|
+
|
120
|
+
CREATE FUNCTION @extschema@.members( sig BYTEA ) RETURNS SETOF INT AS $$
|
121
|
+
for (var i = 0; i < sig.length; i++) {
|
122
|
+
for (var j = 0; j < 8; j++) {
|
123
|
+
if (sig[i] >> j & 1) {
|
124
|
+
plv8.return_next(i * 8 + j);
|
125
|
+
}
|
126
|
+
}
|
127
|
+
}
|
128
|
+
$$ LANGUAGE plv8 STRICT IMMUTABLE;
|
129
|
+
|
130
|
+
|
131
|
+
-- operators for faceting
|
132
|
+
|
133
|
+
CREATE OPERATOR @extschema@.& (
|
134
|
+
leftarg = BYTEA,
|
135
|
+
rightarg = BYTEA,
|
136
|
+
procedure = @extschema@.sig_and,
|
137
|
+
commutator = &
|
138
|
+
);
|
139
|
+
|
140
|
+
CREATE OPERATOR @extschema@.| (
|
141
|
+
leftarg = BYTEA,
|
142
|
+
rightarg = BYTEA,
|
143
|
+
procedure = @extschema@.sig_or,
|
144
|
+
commutator = |
|
145
|
+
);
|
146
|
+
|
147
|
+
CREATE OPERATOR @extschema@.+ (
|
148
|
+
leftarg = BYTEA,
|
149
|
+
rightarg = int,
|
150
|
+
procedure = @extschema@.sig_set
|
151
|
+
);
|
152
|
+
|
153
|
+
|
154
|
+
-- aggregate functions for faceting
|
155
|
+
|
156
|
+
CREATE AGGREGATE @extschema@.collect( BYTEA )
|
157
|
+
(
|
158
|
+
sfunc = @extschema@.sig_or,
|
159
|
+
stype = BYTEA
|
160
|
+
);
|
161
|
+
|
162
|
+
CREATE AGGREGATE @extschema@.filter( BYTEA )
|
163
|
+
(
|
164
|
+
sfunc = @extschema@.sig_and,
|
165
|
+
stype = BYTEA
|
166
|
+
);
|
167
|
+
|
168
|
+
CREATE AGGREGATE @extschema@.signature( INT )
|
169
|
+
(
|
170
|
+
sfunc = @extschema@.sig_set,
|
171
|
+
stype = BYTEA,
|
172
|
+
initcond = ''
|
173
|
+
);
|
data/ext/common/util.sql
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
-- ============================================================================
|
2
|
+
-- These functions are common to all bindings of the Repertoire faceting API
|
3
|
+
--
|
4
|
+
-- Christopher York
|
5
|
+
-- MIT Hyperstudio
|
6
|
+
-- February 2014
|
7
|
+
-- ============================================================================
|
8
|
+
|
9
|
+
|
10
|
+
-- Aggregator to measure how many bits from a loosely-packed id column would be wasted, if
|
11
|
+
-- they were all collected into a bitset signature. Returns a float between 0 (no waste)
|
12
|
+
-- and 1.0 (all waste). An example of its use:
|
13
|
+
--
|
14
|
+
-- SELECT wastage(id) FROM nobelists
|
15
|
+
-- =# 0.999999
|
16
|
+
--
|
17
|
+
-- ALTER TABLE nobelists ADD COLUMN _packed_id SERIAL
|
18
|
+
-- SELECT wastage(_packed_id) FROM nobelists
|
19
|
+
-- =# 0.015625
|
20
|
+
--
|
21
|
+
CREATE FUNCTION @extschema@.wastage_proportion(state INT[]) RETURNS double precision AS $$
|
22
|
+
SELECT (1.0 - (state[1]::double precision / (COALESCE(state[2], 0.0) + 1.0)))
|
23
|
+
$$ LANGUAGE sql;
|
24
|
+
|
25
|
+
CREATE FUNCTION @extschema@.wastage_accum(state INT[], val INT) RETURNS INT[] AS $$
|
26
|
+
SELECT ARRAY[ state[1]+1, GREATEST(state[2], val) ];
|
27
|
+
$$ LANGUAGE sql;
|
28
|
+
|
29
|
+
CREATE AGGREGATE @extschema@.wastage( INT )
|
30
|
+
(
|
31
|
+
sfunc = @extschema@.wastage_accum,
|
32
|
+
stype = INT[],
|
33
|
+
finalfunc = @extschema@.wastage_proportion,
|
34
|
+
initcond = '{0,0}'
|
35
|
+
);
|
@@ -0,0 +1,251 @@
|
|
1
|
+
-- ============================================================================
|
2
|
+
-- Faceting API implementing bitmap indices using a custom C datatype and
|
3
|
+
-- associated functions.
|
4
|
+
--
|
5
|
+
-- This API is to be preferred in all situations where it is possible to
|
6
|
+
-- build and install the datatype (requires superuser access to PostgreSQL)
|
7
|
+
--
|
8
|
+
-- Christopher York
|
9
|
+
-- MIT Hyperstudio
|
10
|
+
-- February 2014
|
11
|
+
-- ============================================================================
|
12
|
+
|
13
|
+
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
|
14
|
+
\echo Use "CREATE EXTENSION faceting" to load this the default faceting API.\quit
|
15
|
+
|
16
|
+
-- functions for bitmap indices using datatype written in C
|
17
|
+
|
18
|
+
CREATE TYPE @extschema@.signature;
|
19
|
+
|
20
|
+
-- basic i/o functions for signatures
|
21
|
+
|
22
|
+
CREATE FUNCTION @extschema@.sig_in(cstring)
|
23
|
+
RETURNS signature
|
24
|
+
AS 'signature.so', 'sig_in'
|
25
|
+
LANGUAGE C STRICT;
|
26
|
+
|
27
|
+
CREATE FUNCTION @extschema@.sig_out(signature)
|
28
|
+
RETURNS cstring
|
29
|
+
AS 'signature.so', 'sig_out'
|
30
|
+
LANGUAGE C STRICT;
|
31
|
+
|
32
|
+
-- signature postgresql type
|
33
|
+
|
34
|
+
CREATE TYPE @extschema@.signature (
|
35
|
+
INTERNALLENGTH = VARIABLE,
|
36
|
+
INPUT = sig_in,
|
37
|
+
OUTPUT = sig_out,
|
38
|
+
STORAGE = extended
|
39
|
+
);
|
40
|
+
|
41
|
+
-- functions for signatures
|
42
|
+
|
43
|
+
CREATE FUNCTION @extschema@.sig_resize( signature, INT )
|
44
|
+
RETURNS signature
|
45
|
+
AS 'signature.so', 'sig_resize'
|
46
|
+
LANGUAGE C STRICT IMMUTABLE;
|
47
|
+
|
48
|
+
CREATE FUNCTION @extschema@.sig_set( signature, INT, INT )
|
49
|
+
RETURNS signature
|
50
|
+
AS 'signature.so', 'sig_set'
|
51
|
+
LANGUAGE C STRICT IMMUTABLE;
|
52
|
+
|
53
|
+
CREATE FUNCTION @extschema@.sig_set( signature, INT )
|
54
|
+
RETURNS signature
|
55
|
+
AS 'signature.so', 'sig_set'
|
56
|
+
LANGUAGE C STRICT IMMUTABLE;
|
57
|
+
|
58
|
+
CREATE FUNCTION @extschema@.sig_get( signature, INT )
|
59
|
+
RETURNS INT
|
60
|
+
AS 'signature.so', 'sig_get'
|
61
|
+
LANGUAGE C STRICT IMMUTABLE;
|
62
|
+
|
63
|
+
CREATE FUNCTION @extschema@.sig_length( signature )
|
64
|
+
RETURNS INT
|
65
|
+
AS 'signature.so', 'sig_length'
|
66
|
+
LANGUAGE C STRICT IMMUTABLE;
|
67
|
+
|
68
|
+
CREATE FUNCTION @extschema@.sig_min( signature )
|
69
|
+
RETURNS INT
|
70
|
+
AS 'signature.so', 'sig_min'
|
71
|
+
LANGUAGE C STRICT IMMUTABLE;
|
72
|
+
|
73
|
+
CREATE FUNCTION @extschema@.sig_and( signature, signature )
|
74
|
+
RETURNS signature
|
75
|
+
AS 'signature.so', 'sig_and'
|
76
|
+
LANGUAGE C STRICT IMMUTABLE;
|
77
|
+
|
78
|
+
CREATE FUNCTION @extschema@.sig_or( signature, signature )
|
79
|
+
RETURNS signature
|
80
|
+
AS 'signature.so', 'sig_or'
|
81
|
+
LANGUAGE C STRICT IMMUTABLE;
|
82
|
+
|
83
|
+
CREATE FUNCTION @extschema@.sig_xor( signature )
|
84
|
+
RETURNS signature
|
85
|
+
AS 'signature.so', 'sig_xor'
|
86
|
+
LANGUAGE C STRICT IMMUTABLE;
|
87
|
+
|
88
|
+
CREATE FUNCTION @extschema@.count( signature )
|
89
|
+
RETURNS INT
|
90
|
+
AS 'signature.so', 'count'
|
91
|
+
LANGUAGE C STRICT IMMUTABLE;
|
92
|
+
|
93
|
+
CREATE FUNCTION @extschema@.contains( signature, INT )
|
94
|
+
RETURNS BOOL
|
95
|
+
AS 'signature.so', 'contains'
|
96
|
+
LANGUAGE C STRICT IMMUTABLE;
|
97
|
+
|
98
|
+
CREATE FUNCTION @extschema@.members( signature )
|
99
|
+
RETURNS SETOF INT
|
100
|
+
AS 'signature.so', 'members'
|
101
|
+
LANGUAGE C STRICT IMMUTABLE;
|
102
|
+
|
103
|
+
CREATE FUNCTION @extschema@.sig_cmp( signature, signature )
|
104
|
+
RETURNS INT
|
105
|
+
AS 'signature.so', 'sig_cmp'
|
106
|
+
LANGUAGE C STRICT IMMUTABLE;
|
107
|
+
|
108
|
+
CREATE FUNCTION @extschema@.sig_lt( signature, signature )
|
109
|
+
RETURNS BOOL
|
110
|
+
AS 'signature.so', 'sig_lt'
|
111
|
+
LANGUAGE C STRICT IMMUTABLE;
|
112
|
+
|
113
|
+
CREATE FUNCTION @extschema@.sig_lte( signature, signature )
|
114
|
+
RETURNS BOOL
|
115
|
+
AS 'signature.so', 'sig_lte'
|
116
|
+
LANGUAGE C STRICT IMMUTABLE;
|
117
|
+
|
118
|
+
CREATE FUNCTION @extschema@.sig_eq( signature, signature )
|
119
|
+
RETURNS BOOL
|
120
|
+
AS 'signature.so', 'sig_eq'
|
121
|
+
LANGUAGE C STRICT IMMUTABLE;
|
122
|
+
|
123
|
+
CREATE FUNCTION @extschema@.sig_gt( signature, signature )
|
124
|
+
RETURNS BOOL
|
125
|
+
AS 'signature.so', 'sig_gt'
|
126
|
+
LANGUAGE C STRICT IMMUTABLE;
|
127
|
+
|
128
|
+
CREATE FUNCTION @extschema@.sig_gte( signature, signature )
|
129
|
+
RETURNS BOOL
|
130
|
+
AS 'signature.so', 'sig_gte'
|
131
|
+
LANGUAGE C STRICT IMMUTABLE;
|
132
|
+
|
133
|
+
|
134
|
+
-- operators for signatures
|
135
|
+
|
136
|
+
CREATE OPERATOR @extschema@.& (
|
137
|
+
leftarg = signature,
|
138
|
+
rightarg = signature,
|
139
|
+
procedure = @extschema@.sig_and,
|
140
|
+
commutator = &
|
141
|
+
);
|
142
|
+
|
143
|
+
CREATE OPERATOR @extschema@.| (
|
144
|
+
leftarg = signature,
|
145
|
+
rightarg = signature,
|
146
|
+
procedure = @extschema@.sig_or,
|
147
|
+
commutator = |
|
148
|
+
);
|
149
|
+
|
150
|
+
CREATE OPERATOR @extschema@.+ (
|
151
|
+
leftarg = signature,
|
152
|
+
rightarg = int,
|
153
|
+
procedure = @extschema@.sig_set
|
154
|
+
);
|
155
|
+
|
156
|
+
CREATE OPERATOR @extschema@.< (
|
157
|
+
leftarg = signature, rightarg = signature, procedure = sig_lt,
|
158
|
+
commutator = > , negator = >= ,
|
159
|
+
restrict = scalarltsel, join = scalarltjoinsel
|
160
|
+
);
|
161
|
+
|
162
|
+
CREATE OPERATOR @extschema@.<= (
|
163
|
+
leftarg = signature, rightarg = signature, procedure = sig_lte,
|
164
|
+
commutator = >= , negator = > ,
|
165
|
+
restrict = scalarltsel, join = scalarltjoinsel
|
166
|
+
);
|
167
|
+
|
168
|
+
CREATE OPERATOR @extschema@.= (
|
169
|
+
leftarg = signature, rightarg = signature, procedure = sig_eq,
|
170
|
+
commutator = = , negator = <> ,
|
171
|
+
restrict = eqsel, join = eqjoinsel
|
172
|
+
);
|
173
|
+
|
174
|
+
CREATE OPERATOR >= (
|
175
|
+
leftarg = signature, rightarg = signature, procedure = sig_gte,
|
176
|
+
commutator = <= , negator = < ,
|
177
|
+
restrict = scalargtsel, join = scalargtjoinsel
|
178
|
+
);
|
179
|
+
|
180
|
+
CREATE OPERATOR @extschema@.> (
|
181
|
+
leftarg = signature, rightarg = signature, procedure = sig_gt,
|
182
|
+
commutator = < , negator = <= ,
|
183
|
+
restrict = scalargtsel, join = scalargtjoinsel
|
184
|
+
);
|
185
|
+
|
186
|
+
-- index operator classes for signatures
|
187
|
+
|
188
|
+
CREATE OPERATOR CLASS @extschema@.signature_ops
|
189
|
+
DEFAULT FOR TYPE signature USING btree AS
|
190
|
+
OPERATOR 1 < ,
|
191
|
+
OPERATOR 2 <= ,
|
192
|
+
OPERATOR 3 = ,
|
193
|
+
OPERATOR 4 >= ,
|
194
|
+
OPERATOR 5 > ,
|
195
|
+
FUNCTION 1 sig_cmp(signature, signature);
|
196
|
+
|
197
|
+
|
198
|
+
-- aggregate functions for faceting
|
199
|
+
|
200
|
+
CREATE AGGREGATE @extschema@.collect( signature )
|
201
|
+
(
|
202
|
+
sfunc = @extschema@.sig_or,
|
203
|
+
stype = signature
|
204
|
+
);
|
205
|
+
|
206
|
+
CREATE AGGREGATE @extschema@.filter( signature )
|
207
|
+
(
|
208
|
+
sfunc = @extschema@.sig_and,
|
209
|
+
stype = signature
|
210
|
+
);
|
211
|
+
|
212
|
+
CREATE AGGREGATE @extschema@.signature( INT )
|
213
|
+
(
|
214
|
+
sfunc = @extschema@.sig_set,
|
215
|
+
stype = signature,
|
216
|
+
initcond = '0'
|
217
|
+
);-- ============================================================================
|
218
|
+
-- These functions are common to all bindings of the Repertoire faceting API
|
219
|
+
--
|
220
|
+
-- Christopher York
|
221
|
+
-- MIT Hyperstudio
|
222
|
+
-- February 2014
|
223
|
+
-- ============================================================================
|
224
|
+
|
225
|
+
|
226
|
+
-- Aggregator to measure how many bits from a loosely-packed id column would be wasted, if
|
227
|
+
-- they were all collected into a bitset signature. Returns a float between 0 (no waste)
|
228
|
+
-- and 1.0 (all waste). An example of its use:
|
229
|
+
--
|
230
|
+
-- SELECT wastage(id) FROM nobelists
|
231
|
+
-- =# 0.999999
|
232
|
+
--
|
233
|
+
-- ALTER TABLE nobelists ADD COLUMN _packed_id SERIAL
|
234
|
+
-- SELECT wastage(_packed_id) FROM nobelists
|
235
|
+
-- =# 0.015625
|
236
|
+
--
|
237
|
+
CREATE FUNCTION @extschema@.wastage_proportion(state INT[]) RETURNS double precision AS $$
|
238
|
+
SELECT (1.0 - (state[1]::double precision / (COALESCE(state[2], 0.0) + 1.0)))
|
239
|
+
$$ LANGUAGE sql;
|
240
|
+
|
241
|
+
CREATE FUNCTION @extschema@.wastage_accum(state INT[], val INT) RETURNS INT[] AS $$
|
242
|
+
SELECT ARRAY[ state[1]+1, GREATEST(state[2], val) ];
|
243
|
+
$$ LANGUAGE sql;
|
244
|
+
|
245
|
+
CREATE AGGREGATE @extschema@.wastage( INT )
|
246
|
+
(
|
247
|
+
sfunc = @extschema@.wastage_accum,
|
248
|
+
stype = INT[],
|
249
|
+
finalfunc = @extschema@.wastage_proportion,
|
250
|
+
initcond = '{0,0}'
|
251
|
+
);
|
@@ -0,0 +1,207 @@
|
|
1
|
+
-- ============================================================================
|
2
|
+
-- Faceting API implementing bitmap indices using PostgreSQL's built-in BYTEA
|
3
|
+
-- type, processed using plv8 typed arrays.
|
4
|
+
--
|
5
|
+
-- This API is suitable for deployment on Heroku, where plv8 is installed by
|
6
|
+
-- default. Performance is many times better than the VARBIT-based faceting
|
7
|
+
-- API, primarily because of optimisations in memory handling in the count
|
8
|
+
-- function.
|
9
|
+
--
|
10
|
+
-- See https://code.google.com/p/plv8js/wiki/PLV8
|
11
|
+
-- https://postgres.heroku.com/blog/past/2013/6/5/javascript_in_your_postgres/
|
12
|
+
--
|
13
|
+
-- Christopher York
|
14
|
+
-- MIT Hyperstudio
|
15
|
+
-- February 2014
|
16
|
+
-- ============================================================================
|
17
|
+
|
18
|
+
CREATE EXTENSION IF NOT EXISTS plv8;
|
19
|
+
|
20
|
+
SET bytea_output TO hex;
|
21
|
+
|
22
|
+
-- these functions are in pl/pgsql, because they involve appending bytea values,
|
23
|
+
-- which is easier done with direct access to the || operator
|
24
|
+
|
25
|
+
CREATE FUNCTION @extschema@.sig_resize( sig BYTEA, bits INT ) RETURNS BYTEA AS $$
|
26
|
+
DECLARE
|
27
|
+
len INT;
|
28
|
+
bytes INT;
|
29
|
+
BEGIN
|
30
|
+
bytes := ceil(bits / 8.0)::INT;
|
31
|
+
len := length(sig);
|
32
|
+
IF bytes > len THEN
|
33
|
+
-- RAISE NOTICE 'Extending signature from % to % bytes', len, bytes;
|
34
|
+
RETURN sig || ('\x' || repeat('00', bytes - len))::BYTEA;
|
35
|
+
ELSIF bits < len THEN
|
36
|
+
-- no provision in PostgreSQL for truncating a BYTEA
|
37
|
+
RETURN sig;
|
38
|
+
END IF;
|
39
|
+
RETURN sig;
|
40
|
+
END $$ LANGUAGE plpgsql STRICT IMMUTABLE;
|
41
|
+
|
42
|
+
CREATE FUNCTION @extschema@.sig_set( sig BYTEA, pos INT, val INT) RETURNS BYTEA AS $$
|
43
|
+
BEGIN
|
44
|
+
RETURN set_bit(sig_resize(sig, pos+1), pos, val);
|
45
|
+
END $$ LANGUAGE plpgsql STRICT IMMUTABLE;
|
46
|
+
|
47
|
+
CREATE FUNCTION @extschema@.sig_set( sig BYTEA, pos INT) RETURNS BYTEA AS $$
|
48
|
+
BEGIN
|
49
|
+
RETURN @extschema@.sig_set(sig, pos, 1);
|
50
|
+
END $$ LANGUAGE plpgsql STRICT IMMUTABLE;
|
51
|
+
|
52
|
+
-- these functions are in javascript, because (1) pl/pgsql is close
|
53
|
+
-- to the worst language in the world; (2) plv8's typed arrays make
|
54
|
+
-- the count function much faster
|
55
|
+
|
56
|
+
CREATE FUNCTION @extschema@.sig_get( sig BYTEA, pos INT ) RETURNS INT AS $$
|
57
|
+
if (pos <= sig.length * 8) {
|
58
|
+
return sig[ Math.floor(pos / 8) ] >> (pos % 8) & 1;
|
59
|
+
} else {
|
60
|
+
return 0;
|
61
|
+
}
|
62
|
+
$$ LANGUAGE plv8 STRICT IMMUTABLE;
|
63
|
+
|
64
|
+
CREATE FUNCTION @extschema@.sig_length( sig BYTEA ) RETURNS INT AS $$
|
65
|
+
return sig.length;
|
66
|
+
$$ LANGUAGE plv8 STRICT IMMUTABLE;
|
67
|
+
|
68
|
+
CREATE FUNCTION @extschema@.sig_and(sig1 BYTEA, sig2 BYTEA) RETURNS BYTEA AS $$
|
69
|
+
if (sig2.length < sig1.length) {
|
70
|
+
var tmp = sig1;
|
71
|
+
sig1 = sig2;
|
72
|
+
sig2 = tmp;
|
73
|
+
}
|
74
|
+
for (var i = 0; i < sig1.length; i++) {
|
75
|
+
sig1[i] = sig1[i] & sig2[i];
|
76
|
+
}
|
77
|
+
return sig1;
|
78
|
+
$$ LANGUAGE plv8 STRICT IMMUTABLE;
|
79
|
+
|
80
|
+
CREATE FUNCTION @extschema@.sig_or(sig1 BYTEA, sig2 BYTEA) RETURNS BYTEA AS $$
|
81
|
+
if (sig2.length > sig1.length) {
|
82
|
+
var tmp = sig1;
|
83
|
+
sig1 = sig2;
|
84
|
+
sig2 = tmp;
|
85
|
+
}
|
86
|
+
for (var i = 0; i < sig2.length; i++) {
|
87
|
+
sig1[i] = sig1[i] | sig2[i];
|
88
|
+
}
|
89
|
+
return sig1;
|
90
|
+
$$ LANGUAGE plv8 STRICT IMMUTABLE;
|
91
|
+
|
92
|
+
CREATE FUNCTION @extschema@.count(sig bytea) RETURNS int4 AS $$
|
93
|
+
var count_table = [
|
94
|
+
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
|
95
|
+
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
96
|
+
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
97
|
+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
98
|
+
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
99
|
+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
100
|
+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
101
|
+
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
102
|
+
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
103
|
+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
104
|
+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
105
|
+
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
106
|
+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
107
|
+
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
108
|
+
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
109
|
+
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
|
110
|
+
];
|
111
|
+
var count = 0;
|
112
|
+
for (var i = 0; i < sig.length; i++) { count += count_table[ sig[i] ]; }
|
113
|
+
return count;
|
114
|
+
$$ LANGUAGE plv8 STRICT IMMUTABLE;
|
115
|
+
|
116
|
+
CREATE FUNCTION @extschema@.contains( sig BYTEA, pos INT ) RETURNS BOOL AS $$
|
117
|
+
return (pos <= sig.length * 8) && (sig[ Math.floor(pos / 8) ] >> (pos % 8) & 1);
|
118
|
+
$$ LANGUAGE plv8 STRICT IMMUTABLE;
|
119
|
+
|
120
|
+
CREATE FUNCTION @extschema@.members( sig BYTEA ) RETURNS SETOF INT AS $$
|
121
|
+
for (var i = 0; i < sig.length; i++) {
|
122
|
+
for (var j = 0; j < 8; j++) {
|
123
|
+
if (sig[i] >> j & 1) {
|
124
|
+
plv8.return_next(i * 8 + j);
|
125
|
+
}
|
126
|
+
}
|
127
|
+
}
|
128
|
+
$$ LANGUAGE plv8 STRICT IMMUTABLE;
|
129
|
+
|
130
|
+
|
131
|
+
-- operators for faceting
|
132
|
+
|
133
|
+
CREATE OPERATOR @extschema@.& (
|
134
|
+
leftarg = BYTEA,
|
135
|
+
rightarg = BYTEA,
|
136
|
+
procedure = @extschema@.sig_and,
|
137
|
+
commutator = &
|
138
|
+
);
|
139
|
+
|
140
|
+
CREATE OPERATOR @extschema@.| (
|
141
|
+
leftarg = BYTEA,
|
142
|
+
rightarg = BYTEA,
|
143
|
+
procedure = @extschema@.sig_or,
|
144
|
+
commutator = |
|
145
|
+
);
|
146
|
+
|
147
|
+
CREATE OPERATOR @extschema@.+ (
|
148
|
+
leftarg = BYTEA,
|
149
|
+
rightarg = int,
|
150
|
+
procedure = @extschema@.sig_set
|
151
|
+
);
|
152
|
+
|
153
|
+
|
154
|
+
-- aggregate functions for faceting
|
155
|
+
|
156
|
+
CREATE AGGREGATE @extschema@.collect( BYTEA )
|
157
|
+
(
|
158
|
+
sfunc = @extschema@.sig_or,
|
159
|
+
stype = BYTEA
|
160
|
+
);
|
161
|
+
|
162
|
+
CREATE AGGREGATE @extschema@.filter( BYTEA )
|
163
|
+
(
|
164
|
+
sfunc = @extschema@.sig_and,
|
165
|
+
stype = BYTEA
|
166
|
+
);
|
167
|
+
|
168
|
+
CREATE AGGREGATE @extschema@.signature( INT )
|
169
|
+
(
|
170
|
+
sfunc = @extschema@.sig_set,
|
171
|
+
stype = BYTEA,
|
172
|
+
initcond = ''
|
173
|
+
);-- ============================================================================
|
174
|
+
-- These functions are common to all bindings of the Repertoire faceting API
|
175
|
+
--
|
176
|
+
-- Christopher York
|
177
|
+
-- MIT Hyperstudio
|
178
|
+
-- February 2014
|
179
|
+
-- ============================================================================
|
180
|
+
|
181
|
+
|
182
|
+
-- Aggregator to measure how many bits from a loosely-packed id column would be wasted, if
|
183
|
+
-- they were all collected into a bitset signature. Returns a float between 0 (no waste)
|
184
|
+
-- and 1.0 (all waste). An example of its use:
|
185
|
+
--
|
186
|
+
-- SELECT wastage(id) FROM nobelists
|
187
|
+
-- =# 0.999999
|
188
|
+
--
|
189
|
+
-- ALTER TABLE nobelists ADD COLUMN _packed_id SERIAL
|
190
|
+
-- SELECT wastage(_packed_id) FROM nobelists
|
191
|
+
-- =# 0.015625
|
192
|
+
--
|
193
|
+
CREATE FUNCTION @extschema@.wastage_proportion(state INT[]) RETURNS double precision AS $$
|
194
|
+
SELECT (1.0 - (state[1]::double precision / (COALESCE(state[2], 0.0) + 1.0)))
|
195
|
+
$$ LANGUAGE sql;
|
196
|
+
|
197
|
+
CREATE FUNCTION @extschema@.wastage_accum(state INT[], val INT) RETURNS INT[] AS $$
|
198
|
+
SELECT ARRAY[ state[1]+1, GREATEST(state[2], val) ];
|
199
|
+
$$ LANGUAGE sql;
|
200
|
+
|
201
|
+
CREATE AGGREGATE @extschema@.wastage( INT )
|
202
|
+
(
|
203
|
+
sfunc = @extschema@.wastage_accum,
|
204
|
+
stype = INT[],
|
205
|
+
finalfunc = @extschema@.wastage_proportion,
|
206
|
+
initcond = '{0,0}'
|
207
|
+
);
|