graffiti 2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/COPYING +676 -0
- data/ChangeLog.mtn +233 -0
- data/README.rdoc +129 -0
- data/TODO +30 -0
- data/doc/diagrams/graffiti-classes.svg +157 -0
- data/doc/diagrams/graffiti-deployment.svg +117 -0
- data/doc/diagrams/graffiti-store-sequence.svg +69 -0
- data/doc/diagrams/squish-select-sequence.svg +266 -0
- data/doc/examples/samizdat-rdf-config.yaml +77 -0
- data/doc/examples/samizdat-triggers-pgsql.sql +266 -0
- data/doc/papers/collreif.tex +462 -0
- data/doc/papers/rdf-to-relational-query-translation-icis2009.tex +936 -0
- data/doc/papers/rel-rdf.tex +545 -0
- data/doc/rdf-impl-report.txt +126 -0
- data/graffiti.gemspec +21 -0
- data/lib/graffiti.rb +15 -0
- data/lib/graffiti/debug.rb +34 -0
- data/lib/graffiti/exceptions.rb +20 -0
- data/lib/graffiti/rdf_config.rb +78 -0
- data/lib/graffiti/rdf_property_map.rb +92 -0
- data/lib/graffiti/sql_mapper.rb +916 -0
- data/lib/graffiti/squish.rb +568 -0
- data/lib/graffiti/store.rb +100 -0
- data/setup.rb +1360 -0
- data/test/ts_graffiti.rb +455 -0
- metadata +122 -0
@@ -0,0 +1,77 @@
|
|
1
|
+
---
|
2
|
+
# rdf.yaml
|
3
|
+
#
|
4
|
+
# Defines essential parts of RDF model of a Samizdat site. Don't touch
|
5
|
+
# it unless you know what you're doing.
|
6
|
+
|
7
|
+
# Namespaces
|
8
|
+
#
|
9
|
+
ns:
|
10
|
+
s: 'http://www.nongnu.org/samizdat/rdf/schema#'
|
11
|
+
tag: 'http://www.nongnu.org/samizdat/rdf/tag#'
|
12
|
+
items: 'http://www.nongnu.org/samizdat/rdf/items#'
|
13
|
+
rdf: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
|
14
|
+
dc: 'http://purl.org/dc/elements/1.1/'
|
15
|
+
dct: 'http://purl.org/dc/terms/'
|
16
|
+
ical: 'http://www.w3.org/2002/12/cal#'
|
17
|
+
|
18
|
+
# Mapping of internal RDF properties to tables and fields. Statements
|
19
|
+
# over properties not listed here or in 'subproperty:' section below are
|
20
|
+
# reified using standard rdf::subject, rdf::predicate, and rdf::object
|
21
|
+
# properties, so at least these three and s::id must be mapped.
|
22
|
+
#
|
23
|
+
map:
|
24
|
+
's::id': {resource: id}
|
25
|
+
'dc::date': {resource: published_date}
|
26
|
+
'dct::isPartOf': {resource: part_of}
|
27
|
+
's::isPartOfSubProperty': {resource: part_of_subproperty}
|
28
|
+
's::partSequenceNumber': {resource: part_sequence_number}
|
29
|
+
|
30
|
+
'rdf::subject': {statement: subject}
|
31
|
+
'rdf::predicate': {statement: predicate}
|
32
|
+
'rdf::object': {statement: object}
|
33
|
+
|
34
|
+
's::login': {member: login}
|
35
|
+
's::fullName': {member: full_name}
|
36
|
+
's::email': {member: email}
|
37
|
+
|
38
|
+
'dc::title': {message: title}
|
39
|
+
'dc::creator': {message: creator}
|
40
|
+
'dc::format': {message: format}
|
41
|
+
'dc::language': {message: language}
|
42
|
+
's::openForAll': {message: open}
|
43
|
+
's::hidden': {message: hidden}
|
44
|
+
's::locked': {message: locked}
|
45
|
+
's::content': {message: content}
|
46
|
+
's::htmlFull': {message: html_full}
|
47
|
+
's::htmlShort': {message: html_short}
|
48
|
+
|
49
|
+
's::rating': {statement: rating}
|
50
|
+
|
51
|
+
's::voteProposition': {vote: proposition}
|
52
|
+
's::voteMember': {vote: member}
|
53
|
+
's::voteRating': {vote: rating}
|
54
|
+
|
55
|
+
# Map of properties into lists of their subproperties. For each property
|
56
|
+
# listed here, an additional qualifier field named <field>_subproperty
|
57
|
+
# is defined in the same table (as defined under 'map:' above) referring
|
58
|
+
# to resource id identifying the subproperty (normally a uriref resource
|
59
|
+
# holding uriref of the subproperty). Only one level of subproperty
|
60
|
+
# relation is supported, all subsubproperties must be listed directly
|
61
|
+
# under root property.
|
62
|
+
#
|
63
|
+
subproperties:
|
64
|
+
'dct::isPartOf': [ 's::inReplyTo', 'dct::isVersionOf',
|
65
|
+
's::isTranslationOf', 's::subTagOf' ]
|
66
|
+
|
67
|
+
# Map of transitive RDF properties into tables that hold their
|
68
|
+
# transitive closures. The format of the table is as follows: 'resource'
|
69
|
+
# field refers to the subject resource id, property field (and qualifier
|
70
|
+
# field in case of subproperty) has the same name as in the main table
|
71
|
+
# (as defined under 'map:' above) and holds reference to predicate
|
72
|
+
# object, and 'distance' field holds the distance from subject to object
|
73
|
+
# in the RDF graph.
|
74
|
+
#
|
75
|
+
transitive_closure:
|
76
|
+
'dct::isPartOf': part
|
77
|
+
|
@@ -0,0 +1,266 @@
|
|
1
|
+
-- Samizdat Database Triggers - PostgreSQL
|
2
|
+
--
|
3
|
+
-- Copyright (c) 2002-2011 Dmitry Borodaenko <angdraug@debian.org>
|
4
|
+
--
|
5
|
+
-- This program is free software.
|
6
|
+
-- You can distribute/modify this program under the terms of
|
7
|
+
-- the GNU General Public License version 3 or later.
|
8
|
+
--
|
9
|
+
|
10
|
+
CREATE FUNCTION insert_resource() RETURNS TRIGGER AS $$
|
11
|
+
BEGIN
|
12
|
+
IF NEW.id IS NULL THEN
|
13
|
+
INSERT INTO resource (literal, uriref, label)
|
14
|
+
VALUES ('false', 'false', TG_ARGV[0]);
|
15
|
+
NEW.id := currval('resource_id_seq');
|
16
|
+
END IF;
|
17
|
+
RETURN NEW;
|
18
|
+
END;
|
19
|
+
$$ LANGUAGE 'plpgsql';
|
20
|
+
|
21
|
+
CREATE TRIGGER insert_statement BEFORE INSERT ON statement
|
22
|
+
FOR EACH ROW EXECUTE PROCEDURE insert_resource('statement');
|
23
|
+
|
24
|
+
CREATE TRIGGER insert_member BEFORE INSERT ON member
|
25
|
+
FOR EACH ROW EXECUTE PROCEDURE insert_resource('member');
|
26
|
+
|
27
|
+
CREATE TRIGGER insert_message BEFORE INSERT ON message
|
28
|
+
FOR EACH ROW EXECUTE PROCEDURE insert_resource('message');
|
29
|
+
|
30
|
+
CREATE TRIGGER insert_vote BEFORE INSERT ON vote
|
31
|
+
FOR EACH ROW EXECUTE PROCEDURE insert_resource('vote');
|
32
|
+
|
33
|
+
CREATE FUNCTION delete_resource() RETURNS TRIGGER AS $$
|
34
|
+
BEGIN
|
35
|
+
DELETE FROM resource WHERE id = OLD.id;
|
36
|
+
RETURN NULL;
|
37
|
+
END;
|
38
|
+
$$ LANGUAGE 'plpgsql';
|
39
|
+
|
40
|
+
CREATE TRIGGER delete_statement AFTER DELETE ON statement
|
41
|
+
FOR EACH ROW EXECUTE PROCEDURE delete_resource();
|
42
|
+
|
43
|
+
CREATE TRIGGER delete_member AFTER DELETE ON member
|
44
|
+
FOR EACH ROW EXECUTE PROCEDURE delete_resource();
|
45
|
+
|
46
|
+
CREATE TRIGGER delete_message AFTER DELETE ON message
|
47
|
+
FOR EACH ROW EXECUTE PROCEDURE delete_resource();
|
48
|
+
|
49
|
+
CREATE TRIGGER delete_vote AFTER DELETE ON vote
|
50
|
+
FOR EACH ROW EXECUTE PROCEDURE delete_resource();
|
51
|
+
|
52
|
+
CREATE FUNCTION select_subproperty(value resource.id%TYPE, subproperty resource.id%TYPE) RETURNS resource.id%TYPE AS $$
|
53
|
+
BEGIN
|
54
|
+
IF subproperty IS NULL THEN
|
55
|
+
RETURN NULL;
|
56
|
+
ELSE
|
57
|
+
RETURN value;
|
58
|
+
END IF;
|
59
|
+
END;
|
60
|
+
$$ LANGUAGE 'plpgsql';
|
61
|
+
|
62
|
+
CREATE FUNCTION calculate_statement_rating(statement_id statement.id%TYPE) RETURNS statement.rating%TYPE AS $$
|
63
|
+
BEGIN
|
64
|
+
RETURN (SELECT AVG(rating) FROM vote WHERE proposition = statement_id);
|
65
|
+
END;
|
66
|
+
$$ LANGUAGE 'plpgsql';
|
67
|
+
|
68
|
+
CREATE FUNCTION update_nrelated(tag_id resource.id%TYPE) RETURNS VOID AS $$
|
69
|
+
DECLARE
|
70
|
+
dc_relation resource.label%TYPE := 'http://purl.org/dc/elements/1.1/relation';
|
71
|
+
s_subtag_of resource.label%TYPE := 'http://www.nongnu.org/samizdat/rdf/schema#subTagOf';
|
72
|
+
s_subtag_of_id resource.id%TYPE;
|
73
|
+
n tag.nrelated%TYPE;
|
74
|
+
supertag RECORD;
|
75
|
+
BEGIN
|
76
|
+
-- update nrelated
|
77
|
+
SELECT COUNT(*) INTO n
|
78
|
+
FROM statement s
|
79
|
+
INNER JOIN resource p ON s.predicate = p.id
|
80
|
+
WHERE p.label = dc_relation AND s.object = tag_id AND s.rating > 0;
|
81
|
+
|
82
|
+
UPDATE tag SET nrelated = n WHERE id = tag_id;
|
83
|
+
IF NOT FOUND THEN
|
84
|
+
INSERT INTO tag (id, nrelated) VALUES (tag_id, n);
|
85
|
+
END IF;
|
86
|
+
|
87
|
+
-- update nrelated_with_subtags for this tag and its supertags
|
88
|
+
SELECT id INTO s_subtag_of_id FROM resource
|
89
|
+
WHERE label = s_subtag_of;
|
90
|
+
|
91
|
+
FOR supertag IN (
|
92
|
+
SELECT tag_id AS id, 0 AS distance
|
93
|
+
UNION
|
94
|
+
SELECT part_of AS id, distance FROM part
|
95
|
+
WHERE id = tag_id
|
96
|
+
AND part_of_subproperty = s_subtag_of_id
|
97
|
+
ORDER BY distance ASC)
|
98
|
+
LOOP
|
99
|
+
UPDATE tag
|
100
|
+
SET nrelated_with_subtags = nrelated + COALESCE((
|
101
|
+
SELECT SUM(subt.nrelated)
|
102
|
+
FROM part p
|
103
|
+
INNER JOIN tag subt ON subt.id = p.id
|
104
|
+
WHERE p.part_of = supertag.id
|
105
|
+
AND p.part_of_subproperty = s_subtag_of_id), 0)
|
106
|
+
WHERE id = supertag.id;
|
107
|
+
END LOOP;
|
108
|
+
END;
|
109
|
+
$$ LANGUAGE 'plpgsql';
|
110
|
+
|
111
|
+
CREATE FUNCTION update_nrelated_if_subtag(tag_id resource.id%TYPE, property resource.id%TYPE) RETURNS VOID AS $$
|
112
|
+
DECLARE
|
113
|
+
s_subtag_of resource.label%TYPE := 'http://www.nongnu.org/samizdat/rdf/schema#subTagOf';
|
114
|
+
s_subtag_of_id resource.id%TYPE;
|
115
|
+
BEGIN
|
116
|
+
SELECT id INTO s_subtag_of_id FROM resource
|
117
|
+
WHERE label = s_subtag_of;
|
118
|
+
|
119
|
+
IF property = s_subtag_of_id THEN
|
120
|
+
PERFORM update_nrelated(tag_id);
|
121
|
+
END IF;
|
122
|
+
END;
|
123
|
+
$$ LANGUAGE 'plpgsql';
|
124
|
+
|
125
|
+
CREATE FUNCTION update_rating() RETURNS TRIGGER AS $$
|
126
|
+
DECLARE
|
127
|
+
dc_relation resource.label%TYPE := 'http://purl.org/dc/elements/1.1/relation';
|
128
|
+
old_rating statement.rating%TYPE;
|
129
|
+
new_rating statement.rating%TYPE;
|
130
|
+
tag_id resource.id%TYPE;
|
131
|
+
predicate_uriref resource.label%TYPE;
|
132
|
+
BEGIN
|
133
|
+
-- save some values for later reference
|
134
|
+
SELECT s.rating, s.object, p.label
|
135
|
+
INTO old_rating, tag_id, predicate_uriref
|
136
|
+
FROM statement s
|
137
|
+
INNER JOIN resource p ON s.predicate = p.id
|
138
|
+
WHERE s.id = NEW.proposition;
|
139
|
+
|
140
|
+
-- set new rating of the proposition
|
141
|
+
new_rating := calculate_statement_rating(NEW.proposition);
|
142
|
+
UPDATE statement SET rating = new_rating WHERE id = NEW.proposition;
|
143
|
+
|
144
|
+
-- check if new rating reverts truth value of the proposition
|
145
|
+
IF predicate_uriref = dc_relation
|
146
|
+
AND (((old_rating IS NULL OR old_rating <= 0) AND new_rating > 0) OR
|
147
|
+
(old_rating > 0 AND new_rating <= 0))
|
148
|
+
THEN
|
149
|
+
PERFORM update_nrelated(tag_id);
|
150
|
+
END IF;
|
151
|
+
|
152
|
+
RETURN NEW;
|
153
|
+
END;
|
154
|
+
$$ LANGUAGE 'plpgsql';
|
155
|
+
|
156
|
+
CREATE TRIGGER update_rating AFTER INSERT OR UPDATE OR DELETE ON vote
|
157
|
+
FOR EACH ROW EXECUTE PROCEDURE update_rating();
|
158
|
+
|
159
|
+
CREATE FUNCTION before_update_part() RETURNS TRIGGER AS $$
|
160
|
+
BEGIN
|
161
|
+
IF TG_OP = 'INSERT' THEN
|
162
|
+
IF NEW.part_of IS NULL THEN
|
163
|
+
RETURN NEW;
|
164
|
+
END IF;
|
165
|
+
ELSIF TG_OP = 'UPDATE' THEN
|
166
|
+
IF (NEW.part_of IS NULL AND OLD.part_of IS NULL) OR
|
167
|
+
((NEW.part_of = OLD.part_of) AND (NEW.part_of_subproperty = OLD.part_of_subproperty))
|
168
|
+
THEN
|
169
|
+
-- part_of is unchanged, do nothing
|
170
|
+
RETURN NEW;
|
171
|
+
END IF;
|
172
|
+
END IF;
|
173
|
+
|
174
|
+
-- check for loops
|
175
|
+
IF NEW.part_of = NEW.id OR NEW.part_of IN (
|
176
|
+
SELECT id FROM part WHERE part_of = NEW.id)
|
177
|
+
THEN
|
178
|
+
-- unset part_of, but don't fail whole query
|
179
|
+
NEW.part_of = NULL;
|
180
|
+
NEW.part_of_subproperty = NULL;
|
181
|
+
|
182
|
+
IF TG_OP != 'INSERT' THEN
|
183
|
+
-- check it was a subtag link
|
184
|
+
PERFORM update_nrelated_if_subtag(OLD.id, OLD.part_of_subproperty);
|
185
|
+
END IF;
|
186
|
+
|
187
|
+
RETURN NEW;
|
188
|
+
END IF;
|
189
|
+
|
190
|
+
RETURN NEW;
|
191
|
+
END;
|
192
|
+
$$ LANGUAGE 'plpgsql';
|
193
|
+
|
194
|
+
CREATE TRIGGER before_update_part BEFORE INSERT OR UPDATE ON resource
|
195
|
+
FOR EACH ROW EXECUTE PROCEDURE before_update_part();
|
196
|
+
|
197
|
+
CREATE FUNCTION after_update_part() RETURNS TRIGGER AS $$
|
198
|
+
BEGIN
|
199
|
+
IF TG_OP = 'INSERT' THEN
|
200
|
+
IF NEW.part_of IS NULL THEN
|
201
|
+
RETURN NEW;
|
202
|
+
END IF;
|
203
|
+
ELSIF TG_OP = 'UPDATE' THEN
|
204
|
+
IF (NEW.part_of IS NULL AND OLD.part_of IS NULL) OR
|
205
|
+
((NEW.part_of = OLD.part_of) AND (NEW.part_of_subproperty = OLD.part_of_subproperty))
|
206
|
+
THEN
|
207
|
+
-- part_of is unchanged, do nothing
|
208
|
+
RETURN NEW;
|
209
|
+
END IF;
|
210
|
+
END IF;
|
211
|
+
|
212
|
+
IF TG_OP != 'INSERT' THEN
|
213
|
+
IF OLD.part_of IS NOT NULL THEN
|
214
|
+
-- clean up links generated for old part_of
|
215
|
+
DELETE FROM part
|
216
|
+
WHERE id IN (
|
217
|
+
-- for old resource...
|
218
|
+
SELECT OLD.id
|
219
|
+
UNION
|
220
|
+
--...and all its parts, ...
|
221
|
+
SELECT id FROM part WHERE part_of = OLD.id)
|
222
|
+
AND part_of IN (
|
223
|
+
-- ...remove links to all parents of old resource
|
224
|
+
SELECT part_of FROM part WHERE id = OLD.id)
|
225
|
+
AND part_of_subproperty = OLD.part_of_subproperty;
|
226
|
+
END IF;
|
227
|
+
END IF;
|
228
|
+
|
229
|
+
IF TG_OP != 'DELETE' THEN
|
230
|
+
IF NEW.part_of IS NOT NULL THEN
|
231
|
+
-- generate links to the parent and grand-parents of new resource
|
232
|
+
INSERT INTO part (id, part_of, part_of_subproperty, distance)
|
233
|
+
SELECT NEW.id, NEW.part_of, NEW.part_of_subproperty, 1
|
234
|
+
UNION
|
235
|
+
SELECT NEW.id, part_of, NEW.part_of_subproperty, distance + 1
|
236
|
+
FROM part
|
237
|
+
WHERE id = NEW.part_of
|
238
|
+
AND part_of_subproperty = NEW.part_of_subproperty;
|
239
|
+
|
240
|
+
-- generate links from all parts of new resource to all its parents
|
241
|
+
INSERT INTO part (id, part_of, part_of_subproperty, distance)
|
242
|
+
SELECT child.id, parent.part_of, NEW.part_of_subproperty,
|
243
|
+
child.distance + parent.distance
|
244
|
+
FROM part child
|
245
|
+
INNER JOIN part parent
|
246
|
+
ON parent.id = NEW.id
|
247
|
+
AND parent.part_of_subproperty = NEW.part_of_subproperty
|
248
|
+
WHERE child.part_of = NEW.id
|
249
|
+
AND child.part_of_subproperty = NEW.part_of_subproperty;
|
250
|
+
END IF;
|
251
|
+
END IF;
|
252
|
+
|
253
|
+
-- check if subtag link was affected
|
254
|
+
IF TG_OP != 'DELETE' THEN
|
255
|
+
PERFORM update_nrelated_if_subtag(NEW.id, NEW.part_of_subproperty);
|
256
|
+
END IF;
|
257
|
+
IF TG_OP != 'INSERT' THEN
|
258
|
+
PERFORM update_nrelated_if_subtag(OLD.id, OLD.part_of_subproperty);
|
259
|
+
END IF;
|
260
|
+
|
261
|
+
RETURN NEW;
|
262
|
+
END;
|
263
|
+
$$ LANGUAGE 'plpgsql';
|
264
|
+
|
265
|
+
CREATE TRIGGER after_update_part AFTER INSERT OR UPDATE OR DELETE ON resource
|
266
|
+
FOR EACH ROW EXECUTE PROCEDURE after_update_part();
|
@@ -0,0 +1,462 @@
|
|
1
|
+
\documentclass{llncs}
|
2
|
+
\usepackage{makeidx} % allows for indexgeneration
|
3
|
+
\usepackage[pdfpagescrop={92 112 523 778},a4paper=false,
|
4
|
+
pdfborder={0 0 0}]{hyperref}
|
5
|
+
\emergencystretch=8pt
|
6
|
+
%
|
7
|
+
\begin{document}
|
8
|
+
\mainmatter % start of the contributions
|
9
|
+
%
|
10
|
+
\title{Model for Collaborative Decision Making Based on RDF Reification}
|
11
|
+
\toctitle{Model for Collaborative Decision Making Based on RDF Reification}
|
12
|
+
\titlerunning{Collaboration and RDF Reification}
|
13
|
+
%
|
14
|
+
\author{Dmitry Borodaenko}
|
15
|
+
\authorrunning{Dmitry Borodaenko} % abbreviated author list (for running head)
|
16
|
+
%%%% modified list of authors for the TOC (add the affiliations)
|
17
|
+
\tocauthor{Dmitry Borodaenko}
|
18
|
+
%
|
19
|
+
\institute{\email{angdraug@debian.org}}
|
20
|
+
|
21
|
+
\maketitle % typeset the title of the contribution
|
22
|
+
|
23
|
+
\begin{abstract}
|
24
|
+
This paper presents a novel approach to online collaboration on the Web,
|
25
|
+
intended as technical means to make collective decisions in situations when
|
26
|
+
consensus fails. It is proposed that participants of the process are allowed
|
27
|
+
to create statements about site resources and, by the means of RDF
|
28
|
+
reification, to assert personal approval of such statements. Arbitrary
|
29
|
+
algorithms may then be used to determine validity of a statement in a given
|
30
|
+
context from the set of approval statements by different participants. The
|
31
|
+
paper goes on to discuss applicability of the proposed approach in the areas
|
32
|
+
of open-source development and independent media, and describes its
|
33
|
+
implementation in the Samizdat open publishing and collaboration system.
|
34
|
+
\end{abstract}
|
35
|
+
|
36
|
+
|
37
|
+
\section{Introduction}
|
38
|
+
|
39
|
+
Extensive growth of Internet over the last decades introduced a new form of
|
40
|
+
human collaboration: online communities. Availability of cheap digital
|
41
|
+
communication media has made it possible to form large distributed projects,
|
42
|
+
bringing together participants who would be otherwise unable to cooperate.
|
43
|
+
|
44
|
+
As more and more projects go online and spread across the globe, it becomes
|
45
|
+
apparent that new opportunities in remote cooperation also bring forth new
|
46
|
+
challenges. As observed by Steven Talbott\cite{fdnc}, technogical means do not
|
47
|
+
provide a full substitute for a real person-to-person relations, ``technology
|
48
|
+
is not a community''. A well-known example of this is the fact that it is
|
49
|
+
vital for an online communty to augment indirect and impersonal digital
|
50
|
+
communications with live meetings. However, even regular live meetings do not
|
51
|
+
solve all of the remote cooperation problems as they are limited in time and
|
52
|
+
scope, and thus can't happen often enough nor include all of the interested
|
53
|
+
parties into communication. In particular, one of the problems of online
|
54
|
+
communities that is begging for a new and better technical solution is
|
55
|
+
decision making and dispute resolution.
|
56
|
+
|
57
|
+
While it is most common that online communities are formed by volunteers,
|
58
|
+
their forms of governance are not necessarily democratic and vary widely, from
|
59
|
+
primitive single-person leadership and meritocracy in less formal technical
|
60
|
+
projects to consensus and majority voting in more complicated situations.
|
61
|
+
|
62
|
+
Usually, decision making in online volunteer projects is carried out via
|
63
|
+
traditional communication means, such as IRC channels, mailing lists,
|
64
|
+
newsgroups, etc., with rare exceptions such as the Debian project which
|
65
|
+
employs its own Devotee voting system based on PGP authentication and Concorde
|
66
|
+
vote counting\cite{debian-constitution}, and the Wikipedia project which
|
67
|
+
relies on a Wiki collaborative publishing system and enforces consensus among
|
68
|
+
its contributors. The scale and the level of quality achieved by the latter
|
69
|
+
two projects demonstrates that formalized collaboration process is as
|
70
|
+
important for volunteer projects as elsewhere: while sufficient to determine
|
71
|
+
rough consensus, traditional communications require participants to come up
|
72
|
+
with informal means of dispute resolution, making the whole process overly
|
73
|
+
dependent on interpersonal attitudes and communicative skills within group.
|
74
|
+
|
75
|
+
It is not to say that Debian or Wikipedia processes are perfect and need not
|
76
|
+
be improved. The strict consensus required by the Wikipedia Editors Policy
|
77
|
+
discourages dissenting minority from participation, while full-scale voting
|
78
|
+
system like Debian Devotee can't be used for every minor day-to-day decision
|
79
|
+
because of the high overhead involved and the limits imposed by the ballot
|
80
|
+
form.
|
81
|
+
|
82
|
+
This paper describes how RDF statement approval based on reification can be
|
83
|
+
applied to the problem of online decision making in diverse and politically
|
84
|
+
intensive distributed projects, and proposes a generic semantic model which
|
85
|
+
can be used in a wide range of applications involving online collaboration.
|
86
|
+
The proposed model is implemented in the Samizdat open-publishing and
|
87
|
+
collaboration engine, described later in the paper.
|
88
|
+
|
89
|
+
|
90
|
+
\section{Collaboration Model}
|
91
|
+
|
92
|
+
The collaboration model implemented by Samizdat evolves around the concept of
|
93
|
+
\emph{open editing}\cite{opened}, which includes the processes of publishing,
|
94
|
+
structuring, and filtering online content. ``Open'' part of open editing
|
95
|
+
implies that the collaboration process is visible to all participants, and
|
96
|
+
roles of readers and editors are available equally to everyone.
|
97
|
+
\emph{Publishing} involves posting new documents, comments, and revised
|
98
|
+
documents. \emph{Structuring} involves categorization and appraisal of
|
99
|
+
publications and other actions of fellow participants. \emph{Filtering}
|
100
|
+
process is intended to reduce information flow to a comprehensible level by
|
101
|
+
presenting a user with resources of highest quality and relevance. Each of
|
102
|
+
these processes requires a fair amount of decision making to be done, which
|
103
|
+
means that its effectiveness can be greatly improved by automating some
|
104
|
+
aspects of the decision making procedure.
|
105
|
+
|
106
|
+
|
107
|
+
\section{Collective Statement Approval}
|
108
|
+
%
|
109
|
+
\subsection{Focus-Centered Site Structure}
|
110
|
+
|
111
|
+
In the proposed collaboration model, RDF statements are used as a generic
|
112
|
+
mechanism for structuring site content. While it is possible to make any kinds
|
113
|
+
of statements about site resources, the most important kind of statement is
|
114
|
+
the one that relates a resource to a so-called ``focus''\cite{concepts}.
|
115
|
+
\emph{Focus} is a kind of resource that, when related by an RDF statement to
|
116
|
+
other resources, allows to group similar resources together and to evaluate
|
117
|
+
resources against different criteria. In some sense, all activities of project
|
118
|
+
members are represented as relations between resources and focuses.
|
119
|
+
|
120
|
+
Dynamically grouping resources around different focuses allows project members
|
121
|
+
to concentrate on the resources that are most relevant to their area of
|
122
|
+
interests and provide best quality. Use of RDF for site structure description
|
123
|
+
makes it possible to store and exchange filters for site resource selection in
|
124
|
+
the form of RDF queries, thus allowing participants to share their preferences
|
125
|
+
and ensuring interoperability with RDF-aware agents.
|
126
|
+
|
127
|
+
Since any resource can be used as a focus, it is possible that project members
|
128
|
+
define their own focuses, and relate focuses one to another. In a sufficiently
|
129
|
+
large and intensive project, this feature should help site structure to evolve
|
130
|
+
in accordance with usage patterns of different groups of users.
|
131
|
+
|
132
|
+
\subsection{RDF Reification}
|
133
|
+
|
134
|
+
RDF reification provides a mechanism for describing RDF statements. As defined
|
135
|
+
in ``RDF Semantics''\cite{rdf-mt}, assertion of reification of RDF statement
|
136
|
+
means that a document exists containing a triple token instantiating the
|
137
|
+
statement. The reified triple is a resource which can be described in the same
|
138
|
+
way as any other resource. It is important to note that there can be several
|
139
|
+
triple tokens with the same subject, object, and predicate, and, according to
|
140
|
+
RDF reification semantics, such tokens should be treated as separate
|
141
|
+
resources, possibly with different composition or provenance information
|
142
|
+
attached to each.
|
143
|
+
|
144
|
+
\subsection{Proposition and Vote}
|
145
|
+
|
146
|
+
In the proposed model, all statements are reified, and may be voted upon by
|
147
|
+
project members. To distinguish statements with attached votes, they are
|
148
|
+
called ``propositions''. \emph{Proposition} is a subclass of RDF statement
|
149
|
+
which can be approved or disapproved by votes of project members. Accordingly,
|
150
|
+
\emph{vote} is a record of vote cast in favor or against particular
|
151
|
+
proposition by particular member, and \emph{rating} is a denotation of
|
152
|
+
approval of the proposition as determined from individual votes.
|
153
|
+
|
154
|
+
Exact mechanism of rating calculation can be determined by each site, or even
|
155
|
+
each user, individually, according to average value of votes cast, level of
|
156
|
+
trust existing between the user and particular voters, absolute number of
|
157
|
+
votes cast, etc. Since individual votes are recorded in RDF and are available
|
158
|
+
for later extraction, rating can be calculated at any time using any formula
|
159
|
+
that suits the end user best. Some users may choose to share their view of the
|
160
|
+
site resources, and publish their filters in the form of RDF queries.
|
161
|
+
|
162
|
+
Default rating system in Samizdat lets voter select from ratings ``$-2$''
|
163
|
+
(no), ``$-1$'' (not likely), ``$0$'' (uncertain), ``$1$'' (likely), ``$2$''
|
164
|
+
(yes). Total rating of proposition is equal to the average value of all votes
|
165
|
+
cast for the proposition; resources with rating below ``$-1$'' are hidden from
|
166
|
+
view.
|
167
|
+
|
168
|
+
|
169
|
+
\section{Target Applications and Use Cases}
|
170
|
+
%
|
171
|
+
\subsection{Open Publishing}
|
172
|
+
|
173
|
+
While it is vital for any project to come up with fair and predictable methods
|
174
|
+
of decision making, it's hard to find a more typical example than the
|
175
|
+
Indymedia network, international open publishing project with the aim of
|
176
|
+
providing the public with unbiased news source\cite{openpub}. Since the main
|
177
|
+
focus of Indymedia is politics, and since it is explicitly open for everyone,
|
178
|
+
independent media centers are used by people from all parts of political
|
179
|
+
spectrum, and often become a place of heated debate, or even target of flood
|
180
|
+
attacks.
|
181
|
+
|
182
|
+
This conflict between fairness and political bias, as well as sheer amount of
|
183
|
+
information flowing through the news network, creates a need for a more
|
184
|
+
flexible categorization and filtering system that would take the burden and
|
185
|
+
responsibility of moderation off from site administrators. The issue of
|
186
|
+
developing an open editing system was raised by Indymedia project participants
|
187
|
+
in January 2002, but, to date, implementations of this concept are not ready
|
188
|
+
for production use. The Active2 project\cite{active2} which has set forth to
|
189
|
+
fulfil that role is still in the alpha stage of the development, and, unlike
|
190
|
+
Samizdat, limits its use of RDF to describing its resources with Dublin Core
|
191
|
+
meta-data.
|
192
|
+
|
193
|
+
Implementation of an open editing system was one of the initial goals of the
|
194
|
+
Samizdat project\cite{oscom3}, and deployment of the Samizdat engine by an
|
195
|
+
independent media center would become a deciding trial of vitality of the
|
196
|
+
proposed collaboration model in a real-world environment.
|
197
|
+
|
198
|
+
\subsection{Documentation Development}
|
199
|
+
|
200
|
+
Complexity level of modern computer systems makes it impossible to develop and
|
201
|
+
operate them without extensive user and developer manuals which document
|
202
|
+
intended behaviour of a system and describe solutions to typical user
|
203
|
+
problems. Ultimately, such manuals reflect collective knowledge about a
|
204
|
+
system, and may require input from many different people with different
|
205
|
+
perspectives. On the other hand, in order to be useful to different people,
|
206
|
+
documentation should be well-structured and easy to navigate.
|
207
|
+
|
208
|
+
The most popular solution for collaborative documentation development to date
|
209
|
+
is \emph{Wiki}, a combination of very simple hypertext markup and ability to
|
210
|
+
edit documents within an HTML form. Such simplicity makes Wiki easy to use,
|
211
|
+
but in the same time limits its applicability to large bodies of
|
212
|
+
documentation. Due to being limited to basic hypertext without categorization
|
213
|
+
and filtering capabilities, Wiki sites require huge amount of manual editing
|
214
|
+
done by trusted maintainers in order to keep the site structure from falling
|
215
|
+
behind a growing amount of available information, and to protect it from
|
216
|
+
vandals. Although there are successful examples of large Wiki sites (most
|
217
|
+
prominent being the Wikipedia project), Wiki does not provide sufficient
|
218
|
+
infrastructure for development and maintainance of complex technical
|
219
|
+
documentation.
|
220
|
+
|
221
|
+
Combination of the Wiki approach with RDF metadata, along with implementation
|
222
|
+
of the proposed collaborative decision making model for determination of
|
223
|
+
documentation structure, would allow to make significant progress in the
|
224
|
+
adoption of the open-source software which is often suffering from a lack of
|
225
|
+
comprehensive and up-to-date documentation.
|
226
|
+
|
227
|
+
\subsection{Bug Tracking}
|
228
|
+
|
229
|
+
Bug-tracking tools have grown to become essential component of any software
|
230
|
+
development process. However, despite wide adoption, bug-tracking software has
|
231
|
+
not yet reached maturity: interoperability between different tools is missing;
|
232
|
+
incompatible issue classifications and work flows complicate status
|
233
|
+
syncronization between companies collaborating on a single project; lack of
|
234
|
+
integration with time-management, document management, version control and
|
235
|
+
other kinds of applications increases amount of routine work done by project
|
236
|
+
manager.
|
237
|
+
|
238
|
+
On the other hand, development of integrated project management systems shows
|
239
|
+
that the most important problem in project management automation is
|
240
|
+
convergence of information from all sources in a single focal point. For such
|
241
|
+
convergence to become possible, unified process flow model, based on open
|
242
|
+
standards such as RDF, should be adopted across all information sources, from
|
243
|
+
source code version control to developer forums. Since strict provenance
|
244
|
+
tracking is a key requirement for such model, the proposed reification-based
|
245
|
+
approach may be employed to satisfy it.
|
246
|
+
|
247
|
+
|
248
|
+
\section{Samizdat Engine}
|
249
|
+
%
|
250
|
+
\subsection{Project Status}
|
251
|
+
|
252
|
+
Samizdat engine is implemented in the Ruby programming language and relies on
|
253
|
+
the PostgreSQL database management system for RDF storage. Other programs
|
254
|
+
required for Samizdat deployment are Ruby/Postgres, Ruby/DBI, and YAML4R
|
255
|
+
libraries for Ruby, and Apache web server with mod\_ruby module. Samizdat is
|
256
|
+
free software and does not require any non-free software to
|
257
|
+
run\cite{impl-report}.
|
258
|
+
|
259
|
+
Samizdat project development started in December 2002, first public release
|
260
|
+
was announced in June 2003. As of the second beta version 0.5.1, released in
|
261
|
+
March 2004, Samizdat provided basic set of open publishing functionality,
|
262
|
+
including registering site members, publishing and replying to messages,
|
263
|
+
uploading multimedia messages, voting on relation of site focuses to
|
264
|
+
resources, creating and managing new focuses, hand-editing or using GUI for
|
265
|
+
constructing and publishing Squish queries that can be used to search and
|
266
|
+
filter site resources. Next major release 0.6.0 is expected to add
|
267
|
+
collaborative documentation development functionality.
|
268
|
+
|
269
|
+
\subsection{Samizdat Schema}
|
270
|
+
|
271
|
+
Core representation of Samizdat content is RDF. Any new resource published on
|
272
|
+
Samizdat site is automatically assigned a unique numberic ID, which, when
|
273
|
+
appended to the base site URL, forms resource URIref. This ID may be accessed
|
274
|
+
via {\tt id} property. Publication time stamp is recorded in {\tt dc:date}
|
275
|
+
property (here and below, ``{\tt dc:}'' prefix refers to the Dublin Core
|
276
|
+
namespace):
|
277
|
+
|
278
|
+
\begin{verbatim}
|
279
|
+
:id
|
280
|
+
rdfs:domain rdfs:Resource .
|
281
|
+
|
282
|
+
dc:date
|
283
|
+
rdfs:domain rdfs:Resource .
|
284
|
+
\end{verbatim}
|
285
|
+
|
286
|
+
{\tt Member} is a registered user of a Samizdat site (synonyms: poster,
|
287
|
+
visitor, reader, author, creator). Members can post messages, create focuses,
|
288
|
+
relate messages to focuses, vote on relations, view messages, use and publish
|
289
|
+
filters based on relations between messages and focuses.
|
290
|
+
|
291
|
+
\begin{verbatim}
|
292
|
+
:Member
|
293
|
+
rdfs:subClassOf rdfs:Resource .
|
294
|
+
|
295
|
+
:login
|
296
|
+
rdfs:domain :Member ;
|
297
|
+
rdfs:range rdfs:Literal .
|
298
|
+
\end{verbatim}
|
299
|
+
|
300
|
+
Resources are related to focuses with {\tt dc:relation} property:
|
301
|
+
|
302
|
+
\begin{verbatim}
|
303
|
+
:Focus
|
304
|
+
rdfs:subClassOf rdfs:Resource .
|
305
|
+
|
306
|
+
dc:relation
|
307
|
+
rdfs:domain rdfs:Resource ;
|
308
|
+
rdfs:range :Focus .
|
309
|
+
\end{verbatim}
|
310
|
+
|
311
|
+
{\tt Proposition} is an RDF statement with {\tt rating} property. Value of
|
312
|
+
{\tt rating} is calculated from {\tt voteRating} values of individual {\tt
|
313
|
+
Vote} resources attached to this proposition via {\tt voteProposition}
|
314
|
+
property:
|
315
|
+
|
316
|
+
\begin{verbatim}
|
317
|
+
:Proposition
|
318
|
+
rdfs:subClassOf rdf:Statement .
|
319
|
+
|
320
|
+
:rating
|
321
|
+
rdfs:domain :Proposition ;
|
322
|
+
rdfs:range rdfs:Literal .
|
323
|
+
|
324
|
+
:Vote
|
325
|
+
rdfs:subClassOf rdfs:Resource .
|
326
|
+
|
327
|
+
:voteProposition
|
328
|
+
rdfs:domain :Vote ;
|
329
|
+
rdfs:range :Proposition .
|
330
|
+
|
331
|
+
:voteMember
|
332
|
+
rdfs:domain :Vote ;
|
333
|
+
rdfs:range :Member .
|
334
|
+
|
335
|
+
:voteRating
|
336
|
+
rdfs:domain :Vote ;
|
337
|
+
rdfs:range rdfs:Literal .
|
338
|
+
\end{verbatim}
|
339
|
+
|
340
|
+
Parts of Samizdat schema that are not relevant to the discussed collective
|
341
|
+
decision making model, such as discussion threads, version control, and
|
342
|
+
aggregate messages, were omitted. Full Samizdat schema in N3 notation can be
|
343
|
+
found in Samizdat source code package.
|
344
|
+
|
345
|
+
\subsection{RDF Storage Implementation}
|
346
|
+
|
347
|
+
To address scalability concerns, Samizdat extends traditional relational
|
348
|
+
representation of RDF as a table of \{subject, object, predicate\} triples
|
349
|
+
with a unique RDF-to-relational query translation technology. Most highly used
|
350
|
+
RDF properties of Samizdat schema are mapped into fields of \emph{internal
|
351
|
+
resource tables} corresponding to resource classes, with id of the record
|
352
|
+
referencing to the {\tt Resource} table; all other properties are recorded as
|
353
|
+
triples in the {\tt Statement} table. Detailed explanation of the
|
354
|
+
RDF-to-relational mapping can be found in ``Samizdat RDF
|
355
|
+
Storage''\cite{rdf-storage} document.
|
356
|
+
|
357
|
+
To demonstrate usage of the Samizdat RDF schema described earlier in this
|
358
|
+
section, the exerpt of Ruby code responsible for individual vote rating
|
359
|
+
assignment is quoted below.
|
360
|
+
|
361
|
+
\begin{verbatim}
|
362
|
+
def rating=(value)
|
363
|
+
value = Focus.validate_rating(value)
|
364
|
+
if value then
|
365
|
+
rdf.assert %{
|
366
|
+
UPDATE ?rating = '#{value}'
|
367
|
+
WHERE (rdf::subject ?stmt #{resource.id})
|
368
|
+
(rdf::predicate ?stmt dc::relation)
|
369
|
+
(rdf::object ?stmt #{@id})
|
370
|
+
(s::voteProposition ?vote ?stmt)
|
371
|
+
(s::voteMember ?vote #{session.id})
|
372
|
+
(s::voteRating ?vote ?rating)
|
373
|
+
USING PRESET NS}
|
374
|
+
@rating = nil # invalidate rating cache
|
375
|
+
end
|
376
|
+
end
|
377
|
+
\end{verbatim}
|
378
|
+
|
379
|
+
In this attribute assignment method of {\tt Focus} class, RDF assertion is
|
380
|
+
recorded in extended Squish syntax and populated with variables storing the
|
381
|
+
rating {\tt value}, resource identifier {\tt resource.id}, focus identifier
|
382
|
+
{\tt @id}, and identifier of registered member {\tt session.id}. When the
|
383
|
+
Samizdat RDF storage layer updates {\tt Vote.voteRating}, average value of
|
384
|
+
corresponding {\tt Proposition.rating} is recalculated by a stored procedure.
|
385
|
+
|
386
|
+
|
387
|
+
\section{Conclusions}
|
388
|
+
|
389
|
+
Initially started as an RDF-based open-publishing engine, Samizdat project
|
390
|
+
opens a new approach to online collaboration in general. Proposed model of
|
391
|
+
collective statement approval via RDF reification is applicable in a large
|
392
|
+
range of problem domains, including documentation development and bug
|
393
|
+
tracking.
|
394
|
+
|
395
|
+
Implementation of the proposed model in the Samizdat engine proves viability
|
396
|
+
of RDF not only as a metadata interchange format, but also as a data model
|
397
|
+
that may be employed by software architects in innovative ways. Key role
|
398
|
+
played by RDF reification in the described model shows that this comparatively
|
399
|
+
obscure part of RDF standard deserves broader mindshare among Semantic Web
|
400
|
+
developers.
|
401
|
+
|
402
|
+
|
403
|
+
% ---- Bibliography ----
|
404
|
+
%
|
405
|
+
\begin{thebibliography}{19}
|
406
|
+
%
|
407
|
+
\bibitem {openpub}
|
408
|
+
Arnison, Matthew:
|
409
|
+
Open publishing is the same as free software, 2002\\
|
410
|
+
http://www.cat.org.au/maffew/cat/openpub.html
|
411
|
+
|
412
|
+
\bibitem {concepts}
|
413
|
+
Borodaenko, Dmitry:
|
414
|
+
Samizdat Concepts, December 2002\\
|
415
|
+
http://savannah.nongnu.org/cgi-bin/viewcvs/samizdat/samizdat/doc/\\
|
416
|
+
concepts.txt
|
417
|
+
|
418
|
+
\bibitem {rdf-storage}
|
419
|
+
Borodaenko, Dmitry:
|
420
|
+
Samizdat RDF Storage, December 2002\\
|
421
|
+
http://savannah.nongnu.org/cgi-bin/viewcvs/samizdat/samizdat/doc/\\
|
422
|
+
rdf-storage.txt
|
423
|
+
|
424
|
+
\bibitem {oscom3}
|
425
|
+
Borodaenko, Dmitry:
|
426
|
+
Samizdat --- RDF model for an open publishing and cooperation engine. Third
|
427
|
+
International OSCOM Conference, Berkman Center for Internet and Society,
|
428
|
+
Harvard Law School, May 2003\\
|
429
|
+
http://slideml.bitflux.ch/files/slidesets/503/title.html
|
430
|
+
|
431
|
+
\bibitem {impl-report}
|
432
|
+
Borodaenko, Dmitry:
|
433
|
+
Samizdat RDF Implementation Report, September 2003\\
|
434
|
+
http://lists.w3.org/Archives/Public/www-rdf-interest/2003Sep/0043.html
|
435
|
+
|
436
|
+
\bibitem {debian-constitution}
|
437
|
+
Debian Constitution. Debian Project, 1999\\
|
438
|
+
http://www.debian.org/devel/constitution
|
439
|
+
|
440
|
+
\bibitem {rdf-mt}
|
441
|
+
Hayes, Patrick:
|
442
|
+
RDF Semantics. W3C, February 2004\\
|
443
|
+
http://www.w3.org/TR/rdf-mt
|
444
|
+
|
445
|
+
\bibitem {opened}
|
446
|
+
Jay, Dru:
|
447
|
+
Three Proposals for Open Publishing --- Towards a transparent, collaborative
|
448
|
+
editorial framework, 2002\\
|
449
|
+
http://dru.ca/imc/open\_pub.html
|
450
|
+
|
451
|
+
\bibitem {fdnc}
|
452
|
+
Talbott, Stephen L.:
|
453
|
+
The Future Does Not Compute. O'Reilly \& Associates, 1995\\
|
454
|
+
http://www.oreilly.com/\homedir{}stevet/fdnc/
|
455
|
+
|
456
|
+
\bibitem {active2}
|
457
|
+
Warren, Mike:
|
458
|
+
Active2 Design. Indymedia, 2002.\\
|
459
|
+
http://docs.indymedia.org/view/Devel/DesignDocument
|
460
|
+
|
461
|
+
\end{thebibliography}
|
462
|
+
\end{document}
|