repertoire-faceting 0.5.5 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/FAQ +23 -17
  3. data/INSTALL +52 -84
  4. data/LICENSE +1 -1
  5. data/README +213 -34
  6. data/TODO +20 -7
  7. data/ext/Makefile +24 -14
  8. data/ext/README.faceting +51 -0
  9. data/ext/bytea/bytea.sql +173 -0
  10. data/ext/bytea/faceting_bytea.control +6 -0
  11. data/ext/common/util.sql +35 -0
  12. data/ext/faceting--0.6.0.sql +251 -0
  13. data/ext/faceting_bytea--0.6.0.sql +207 -0
  14. data/ext/faceting_varbit--0.6.0.sql +198 -0
  15. data/ext/signature/faceting.control +6 -0
  16. data/ext/signature/signature.c +740 -0
  17. data/ext/{signature.o → signature/signature.o} +0 -0
  18. data/ext/{signature.so → signature/signature.so} +0 -0
  19. data/ext/signature/signature.sql +217 -0
  20. data/ext/varbit/faceting_varbit.control +7 -0
  21. data/ext/varbit/varbit.sql +164 -0
  22. data/{public → lib/assets}/images/repertoire-faceting/proportional_symbol.png +0 -0
  23. data/{public → lib/assets}/images/repertoire-faceting/spinner_sm.gif +0 -0
  24. data/{public → lib/assets}/javascripts/rep.faceting/context.js +2 -2
  25. data/{public → lib/assets}/javascripts/rep.faceting/ext/earth_facet.js +2 -4
  26. data/{public → lib/assets}/javascripts/rep.faceting/facet.js +1 -1
  27. data/{public → lib/assets}/javascripts/rep.faceting/facet_widget.js +3 -8
  28. data/{public → lib/assets}/javascripts/rep.faceting/nested_facet.js +1 -1
  29. data/{public → lib/assets}/javascripts/rep.faceting/results.js +1 -1
  30. data/{public → lib/assets}/javascripts/rep.faceting.js +5 -1
  31. data/{public → lib/assets}/javascripts/rep.protovis-facets.js +3 -3
  32. data/lib/assets/javascripts/rep.widgets/events.js +51 -0
  33. data/lib/assets/javascripts/rep.widgets/global.js +50 -0
  34. data/lib/assets/javascripts/rep.widgets/model.js +159 -0
  35. data/lib/assets/javascripts/rep.widgets/widget.js +213 -0
  36. data/lib/assets/javascripts/rep.widgets.js +14 -0
  37. data/{public → lib/assets}/stylesheets/rep.faceting.css +1 -1
  38. data/lib/repertoire-faceting/adapters/postgresql_adapter.rb +107 -48
  39. data/lib/repertoire-faceting/facets/abstract_facet.rb +43 -27
  40. data/lib/repertoire-faceting/facets/basic_facet.rb +23 -22
  41. data/lib/repertoire-faceting/facets/nested_facet.rb +50 -27
  42. data/lib/repertoire-faceting/model.rb +101 -65
  43. data/lib/repertoire-faceting/rails/engine.rb +8 -0
  44. data/lib/repertoire-faceting/rails/postgresql_adapter.rb +0 -1
  45. data/lib/repertoire-faceting/rails/relation.rb +0 -1
  46. data/lib/repertoire-faceting/railtie.rb +0 -1
  47. data/lib/repertoire-faceting/relation/calculations.rb +7 -2
  48. data/lib/repertoire-faceting/relation/query_methods.rb +17 -4
  49. data/lib/repertoire-faceting/routing.rb +2 -5
  50. data/lib/repertoire-faceting/tasks/all.rake +5 -4
  51. data/lib/repertoire-faceting/tasks/client.rake +2 -5
  52. data/lib/repertoire-faceting/version.rb +1 -1
  53. data/lib/repertoire-faceting.rb +2 -4
  54. data/{public → vendor/assets}/javascripts/google-earth-extensions.js +0 -0
  55. data/{public → vendor/assets}/javascripts/protovis.js +0 -0
  56. metadata +78 -78
  57. data/ext/README.signature +0 -33
  58. data/ext/signature.c +0 -740
  59. data/ext/signature.sql +0 -342
  60. data/ext/signature.sql.IN +0 -342
  61. data/ext/uninstall_signature.sql +0 -4
  62. data/ext/uninstall_signature.sql.IN +0 -4
  63. data/lib/repertoire-faceting/adapters/abstract_adapter.rb +0 -18
  64. data/lib/repertoire-faceting/relation/spawn_methods.rb +0 -26
@@ -0,0 +1,198 @@
1
+ -- ============================================================================
2
+ -- Faceting API implementing bitmap indices using PostgreSQL's built-in VARBIT
3
+ -- type, processed using the built-in language pl/pgsql.
4
+ --
5
+ -- This API is suitable for deployment on any host, since it requires no
6
+ -- PostgreSQL extensions outside the default install.
7
+ --
8
+ -- However, performance is limited to around 30,000 items in practice (in part
9
+ -- because of unnecessary duplication of varbit values when pl/pgsql evaluates
10
+ -- the count function.)
11
+ --
12
+ -- The 'signature' C-based faceting API is preferable for any install where
13
+ -- you have superuser access to the database.
14
+ --
15
+ -- Christopher York
16
+ -- MIT Hyperstudio
17
+ -- February 2014
18
+ -- ============================================================================
19
+
20
+ CREATE FUNCTION @extschema@.sig_resize( sig VARBIT, bits INT ) RETURNS VARBIT AS $$
21
+ DECLARE
22
+ len INT;
23
+ BEGIN
24
+ len := length(sig);
25
+ IF bits > len THEN
26
+ RETURN sig || repeat('0', bits - len)::VARBIT;
27
+ ELSIF bits < len THEN
28
+ RETURN substring(sig FROM 1 FOR bits);
29
+ END IF;
30
+ RETURN sig;
31
+ END $$ LANGUAGE plpgsql STRICT IMMUTABLE;
32
+
33
+ CREATE FUNCTION @extschema@.sig_set( sig VARBIT, pos INT, val INT) RETURNS VARBIT AS $$
34
+ DECLARE
35
+ len INT;
36
+ BEGIN
37
+ len := length(sig);
38
+ IF pos >= len THEN
39
+ IF val > 0 THEN
40
+ RETURN set_bit(@extschema@.sig_resize(sig, pos+1), pos, 1);
41
+ ELSE
42
+ RETURN sig;
43
+ END IF;
44
+ ELSE
45
+ RETURN set_bit(sig, pos, val);
46
+ END IF;
47
+ END $$ LANGUAGE plpgsql STRICT IMMUTABLE;
48
+
49
+ CREATE FUNCTION @extschema@.sig_set( sig VARBIT, pos INT) RETURNS VARBIT AS $$
50
+ BEGIN
51
+ RETURN @extschema@.sig_set(sig, pos, 1);
52
+ END $$ LANGUAGE plpgsql STRICT IMMUTABLE;
53
+
54
+ CREATE FUNCTION @extschema@.sig_get( sig VARBIT, pos INT ) RETURNS INT AS $$
55
+ DECLARE
56
+ len INT;
57
+ BEGIN
58
+ len := length(sig);
59
+ IF pos >= len THEN
60
+ RETURN 0;
61
+ ELSE
62
+ RETURN get_bit(sig, pos);
63
+ END IF;
64
+ END $$ LANGUAGE plpgsql STRICT IMMUTABLE;
65
+
66
+ CREATE FUNCTION @extschema@.sig_length( sig VARBIT ) RETURNS INT AS $$
67
+ BEGIN
68
+ RETURN length(sig);
69
+ END $$ LANGUAGE plpgsql STRICT IMMUTABLE;
70
+
71
+ CREATE FUNCTION @extschema@.sig_min( sig VARBIT ) RETURNS INT AS $$
72
+ BEGIN
73
+ RETURN position('1' in sig) - 1;
74
+ END $$ LANGUAGE plpgsql STRICT IMMUTABLE;
75
+
76
+ CREATE FUNCTION @extschema@.sig_and( sig1 VARBIT, sig2 VARBIT ) RETURNS VARBIT AS $$
77
+ DECLARE
78
+ len INT;
79
+ BEGIN
80
+ len := GREATEST(length(sig1), length(sig2));
81
+ RETURN bitand(@extschema@.sig_resize(sig1, len), @extschema@.sig_resize(sig2, len)) ;
82
+ END $$ LANGUAGE plpgsql STRICT IMMUTABLE;
83
+
84
+ CREATE FUNCTION @extschema@.sig_or( sig1 VARBIT, sig2 VARBIT ) RETURNS VARBIT AS $$
85
+ DECLARE
86
+ len INT;
87
+ BEGIN
88
+ len := GREATEST(length(sig1), length(sig2));
89
+ RETURN bitor(@extschema@.sig_resize(sig1, len), @extschema@.sig_resize(sig2, len)) ;
90
+ END $$ LANGUAGE plpgsql STRICT IMMUTABLE;
91
+
92
+ CREATE FUNCTION @extschema@.sig_xor( sig1 VARBIT, sig2 VARBIT ) RETURNS VARBIT AS $$
93
+ DECLARE
94
+ len INT;
95
+ BEGIN
96
+ len := GREATEST(length(sig1), length(sig2));
97
+ RETURN bitxor(@extschema@.sig_resize(sig1, len), @extschema@.sig_resize(sig2, len)) ;
98
+ END $$ LANGUAGE plpgsql STRICT IMMUTABLE;
99
+
100
+ CREATE FUNCTION @extschema@.count( sig VARBIT ) RETURNS INT AS $$
101
+ BEGIN
102
+ -- This is, by any measure, horrific. However, it appears to be the only
103
+ -- way to use PostgreSQL built in functions to count bits in a bit string.
104
+ RETURN length(replace(sig::TEXT, '0', ''));
105
+ END $$ LANGUAGE plpgsql STRICT IMMUTABLE;
106
+
107
+ CREATE FUNCTION @extschema@.contains( sig VARBIT, pos INT ) RETURNS BOOL AS $$
108
+ BEGIN
109
+ RETURN @extschema@.sig_get(sig, pos) = 1;
110
+ END $$ LANGUAGE plpgsql STRICT IMMUTABLE;
111
+
112
+ CREATE FUNCTION @extschema@.members( sig VARBIT ) RETURNS SETOF INT AS $$
113
+ BEGIN
114
+ FOR i IN 0 .. length(sig) - 1 LOOP
115
+ IF @extschema@.contains(sig, i) THEN
116
+ RETURN NEXT i;
117
+ END IF;
118
+ END LOOP;
119
+ END $$ LANGUAGE plpgsql STRICT IMMUTABLE;
120
+
121
+
122
+ -- operators for faceting
123
+
124
+ CREATE OPERATOR @extschema@.& (
125
+ leftarg = VARBIT,
126
+ rightarg = VARBIT,
127
+ procedure = @extschema@.sig_and,
128
+ commutator = &
129
+ );
130
+
131
+ CREATE OPERATOR @extschema@.| (
132
+ leftarg = VARBIT,
133
+ rightarg = VARBIT,
134
+ procedure = @extschema@.sig_or,
135
+ commutator = |
136
+ );
137
+
138
+ CREATE OPERATOR @extschema@.+ (
139
+ leftarg = VARBIT,
140
+ rightarg = int,
141
+ procedure = @extschema@.sig_set
142
+ );
143
+
144
+
145
+ -- aggregate functions for faceting
146
+
147
+ CREATE AGGREGATE @extschema@.collect( VARBIT )
148
+ (
149
+ sfunc = @extschema@.sig_or,
150
+ stype = VARBIT
151
+ );
152
+
153
+ CREATE AGGREGATE @extschema@.filter( VARBIT )
154
+ (
155
+ sfunc = @extschema@.sig_and,
156
+ stype = VARBIT
157
+ );
158
+
159
+ CREATE AGGREGATE @extschema@.signature( INT )
160
+ (
161
+ sfunc = @extschema@.sig_set,
162
+ stype = VARBIT,
163
+ initcond = '0'
164
+ );-- ============================================================================
165
+ -- These functions are common to all bindings of the Repertoire faceting API
166
+ --
167
+ -- Christopher York
168
+ -- MIT Hyperstudio
169
+ -- February 2014
170
+ -- ============================================================================
171
+
172
+
173
+ -- Aggregator to measure how many bits from a loosely-packed id column would be wasted, if
174
+ -- they were all collected into a bitset signature. Returns a float between 0 (no waste)
175
+ -- and 1.0 (all waste). An example of its use:
176
+ --
177
+ -- SELECT wastage(id) FROM nobelists
178
+ -- =# 0.999999
179
+ --
180
+ -- ALTER TABLE nobelists ADD COLUMN _packed_id SERIAL
181
+ -- SELECT wastage(_packed_id) FROM nobelists
182
+ -- =# 0.015625
183
+ --
184
+ CREATE FUNCTION @extschema@.wastage_proportion(state INT[]) RETURNS double precision AS $$
185
+ SELECT (1.0 - (state[1]::double precision / (COALESCE(state[2], 0.0) + 1.0)))
186
+ $$ LANGUAGE sql;
187
+
188
+ CREATE FUNCTION @extschema@.wastage_accum(state INT[], val INT) RETURNS INT[] AS $$
189
+ SELECT ARRAY[ state[1]+1, GREATEST(state[2], val) ];
190
+ $$ LANGUAGE sql;
191
+
192
+ CREATE AGGREGATE @extschema@.wastage( INT )
193
+ (
194
+ sfunc = @extschema@.wastage_accum,
195
+ stype = INT[],
196
+ finalfunc = @extschema@.wastage_proportion,
197
+ initcond = '{0,0}'
198
+ );
@@ -0,0 +1,6 @@
1
+ # Faceting PostgreSQL extension module
2
+
3
+ comment = 'API for faceted indexing and queries (based on custom C bitmap type)'
4
+ requires = plpgsql
5
+ default_version = '0.6.0'
6
+ schema = 'facet'