icss 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.rspec +3 -0
- data/.watchr +20 -0
- data/CHANGELOG.textile +8 -0
- data/Gemfile +17 -0
- data/Gemfile.lock +34 -0
- data/LICENSE.textile +20 -0
- data/README.textile +19 -0
- data/Rakefile +43 -0
- data/VERSION +1 -0
- data/examples/BulkData.avpr +21 -0
- data/examples/complicated.icss.yaml +158 -0
- data/examples/interop.avsc +32 -0
- data/examples/mail.avpr +20 -0
- data/examples/namespace.avpr +28 -0
- data/examples/org/apache/avro/ipc/HandshakeRequest.avsc +11 -0
- data/examples/org/apache/avro/ipc/HandshakeResponse.avsc +15 -0
- data/examples/org/apache/avro/ipc/trace/avroTrace.avdl +64 -0
- data/examples/org/apache/avro/ipc/trace/avroTrace.avpr +82 -0
- data/examples/org/apache/avro/mapred/tether/InputProtocol.avpr +59 -0
- data/examples/org/apache/avro/mapred/tether/OutputProtocol.avpr +75 -0
- data/examples/simple.avpr +70 -0
- data/examples/weather.avsc +9 -0
- data/icss.gemspec +104 -0
- data/icss_specification.textile +370 -0
- data/init.rb +3 -0
- data/lib/icss.rb +19 -0
- data/lib/icss/brevity.rb +136 -0
- data/lib/icss/code_asset.rb +16 -0
- data/lib/icss/core_ext.rb +4 -0
- data/lib/icss/data_asset.rb +22 -0
- data/lib/icss/message.rb +72 -0
- data/lib/icss/old.rb +96 -0
- data/lib/icss/protocol.rb +138 -0
- data/lib/icss/protocol_set.rb +48 -0
- data/lib/icss/sample_message_call.rb +140 -0
- data/lib/icss/target.rb +71 -0
- data/lib/icss/type.rb +517 -0
- data/lib/icss/type/factory.rb +196 -0
- data/lib/icss/validations.rb +16 -0
- data/lib/icss/view_helper.rb +28 -0
- data/spec/icss_spec.rb +7 -0
- data/spec/spec_helper.rb +31 -0
- metadata +218 -0
@@ -0,0 +1,370 @@
|
|
1
|
+
h1. The Infochimps Stupid Schema (ICSS) Specification
|
2
|
+
|
3
|
+
The icss is intended to be a *complete* and *expressive* description of a collection of related data and its associated assets. This includes a description of data assets (including their location and schema) and of api calls (messages) based on the the described records (including their call signature and schema).
|
4
|
+
|
5
|
+
Besides this document, please refer to icss_style.textile, which gives agreed style convention for icss authoring.
|
6
|
+
|
7
|
+
Before any further proclamations here is a complete example:
|
8
|
+
|
9
|
+
<pre><code>
|
10
|
+
---
|
11
|
+
namespace: language.corpora.word_freq
|
12
|
+
protocol: bnc
|
13
|
+
under_consideration: true
|
14
|
+
update_frequency: monthly
|
15
|
+
|
16
|
+
messages:
|
17
|
+
word_stats:
|
18
|
+
request:
|
19
|
+
- name: word_stats_request
|
20
|
+
type: word_stats_request
|
21
|
+
response: word_statistics_record
|
22
|
+
doc: Query on a head word to get back word statistics (and word variants) from the British National Corpus.
|
23
|
+
samples:
|
24
|
+
- request:
|
25
|
+
- head_word: hello
|
26
|
+
response:
|
27
|
+
variant_word_dispersion: 0.78
|
28
|
+
variant_word_freq_ppm: 38.0
|
29
|
+
part_of_speech: Int
|
30
|
+
head_word: hello
|
31
|
+
variant_word_range: 91
|
32
|
+
variant_word: hello
|
33
|
+
head_word_dispersion: 0.78
|
34
|
+
head_word_freq_ppm: 38.0
|
35
|
+
head_word_range: 91
|
36
|
+
|
37
|
+
data_assets:
|
38
|
+
- name: word_stats_data_asset
|
39
|
+
location: data/word_stats
|
40
|
+
type: word_statistics_record
|
41
|
+
|
42
|
+
code_assets:
|
43
|
+
- location: code/bnc_endpoint.rb
|
44
|
+
name: bnc_endpoint
|
45
|
+
|
46
|
+
types:
|
47
|
+
|
48
|
+
- name: word_stats_request
|
49
|
+
type: record
|
50
|
+
doc: Query api parameters for the word_stats message
|
51
|
+
fields:
|
52
|
+
- name: head_word
|
53
|
+
type: string
|
54
|
+
|
55
|
+
- name: word_statistics_record
|
56
|
+
doc: |-
|
57
|
+
Here we provide plain text versions of the frequency lists contained in WFWSE. These are
|
58
|
+
raw unedited frequency lists produced by our software and do not contain the many
|
59
|
+
additional notes supplied in the book itself. The lists are tab delimited plain text so
|
60
|
+
can be imported into your prefered spreadsheet format. For the main lists we provide a
|
61
|
+
key to the columns. More details on the process undertaken in the preparation of the
|
62
|
+
lists can be found in the introduction to the book. These lists show dispersion ranging
|
63
|
+
between 0 and 1 rather than 0 and 100 as in the book. We multiplied the value by 100 and
|
64
|
+
rounded to zero decimal places in the book for reasons of space. Log likelihood values
|
65
|
+
are shown here to one decimal place rather than zero as in the book. Please note, all
|
66
|
+
frequencies are per million words.
|
67
|
+
type: record
|
68
|
+
fields:
|
69
|
+
- name: head_word
|
70
|
+
doc: Word type headword - see pp.4-5
|
71
|
+
type: string
|
72
|
+
- name: part_of_speech
|
73
|
+
doc: Part of speech (grammatical word class - see pp. 12-13)
|
74
|
+
type: string
|
75
|
+
- name: head_word_freq_ppm
|
76
|
+
doc: Rounded frequency per million word tokens (down to a minimum of 10 occurrences of a lemma per million)- see pp. 5. Where BOTH head word and lemmas appear
|
77
|
+
type: float
|
78
|
+
- name: head_word_range
|
79
|
+
doc: Range, the number of sectors of the corpus (out of a maximum of 100) in which the word occurs. Where BOTH head word and lemmas appear
|
80
|
+
type: int
|
81
|
+
- name: head_word_dispersion
|
82
|
+
doc: Dispersion value (Juilland's D) from a minimum of 0.00 to a maximum of 1.00. Where BOTH head word and lemmas appear
|
83
|
+
type: float
|
84
|
+
- name: variant_word
|
85
|
+
doc: Variant form of headword
|
86
|
+
type: string
|
87
|
+
- name: variant_word_freq_ppm
|
88
|
+
doc: Rounded frequency per million word tokens (down to a minimum of 10 occurrences of a lemma per million)- see pp. 5. Where BOTH head word and lemmas appear
|
89
|
+
type: float
|
90
|
+
- name: variant_word_range
|
91
|
+
doc: Range, the number of sectors of the corpus (out of a maximum of 100) in which the word occurs. Where BOTH head word and lemmas appear
|
92
|
+
type: int
|
93
|
+
- name: variant_word_dispersion
|
94
|
+
doc: Dispersion value (Juilland's D) from a minimum of 0.00 to a maximum of 1.00. Where BOTH head word and lemmas appear
|
95
|
+
type: float
|
96
|
+
|
97
|
+
targets:
|
98
|
+
mysql:
|
99
|
+
- table_name: bnc
|
100
|
+
database: language_corpora_word_freq
|
101
|
+
name: word_freq_bnc
|
102
|
+
data_assets:
|
103
|
+
- word_stats_data_asset
|
104
|
+
apeyeye:
|
105
|
+
- code_assets:
|
106
|
+
- bnc_endpoint.rb
|
107
|
+
catalog:
|
108
|
+
- name: word_freq_bnc
|
109
|
+
title: Word Frequencies From the British National Corpus
|
110
|
+
description: |-
|
111
|
+
Here we provide plain text versions of the frequency lists contained in WFWSE. These are
|
112
|
+
raw unedited frequency lists produced by our software and do not contain the many
|
113
|
+
additional notes supplied in the book itself. The lists are tab delimited plain text so
|
114
|
+
can be imported into your prefered spreadsheet format. For the main lists we provide a
|
115
|
+
key to the columns. More details on the process undertaken in the preparation of the
|
116
|
+
lists can be found in the introduction to the book. These lists show dispersion ranging
|
117
|
+
between 0 and 1 rather than 0 and 100 as in the book. We multiplied the value by 100 and
|
118
|
+
rounded to zero decimal places in the book for reasons of space. Log likelihood values
|
119
|
+
are shown here to one decimal place rather than zero as in the book. Please note, all
|
120
|
+
frequencies are per million words.
|
121
|
+
tags:
|
122
|
+
- token
|
123
|
+
- word
|
124
|
+
- corpus
|
125
|
+
- word-frequency
|
126
|
+
- british
|
127
|
+
- words
|
128
|
+
- language
|
129
|
+
messages:
|
130
|
+
- word_stats
|
131
|
+
packages:
|
132
|
+
- data_assets:
|
133
|
+
- word_stats_data_asset
|
134
|
+
|
135
|
+
</code></pre>
|
136
|
+
|
137
|
+
h2. namespace
|
138
|
+
|
139
|
+
The namespace for the entire icss. It should be interpreted as the 'category' and 'subcategory' and so on. While the nesting is allowed to be arbitrarily deep, nesting deeper than 2 is strongly discouraged unless you have a very good reason (you probably don't). Each additional category must be appended with a dot ('.').
|
140
|
+
|
141
|
+
h2. protocol
|
142
|
+
|
143
|
+
The short name of the collection. Together the protocol and namespace should be globally unique and fully qualify the data.
|
144
|
+
|
145
|
+
h2. under_consideration
|
146
|
+
|
147
|
+
This flag is set to true while you are working on a procuring a dataset. The icss can still be published, and the webpage will reflect the current status of the icss based upon this flag.
|
148
|
+
|
149
|
+
h2. update_frequency
|
150
|
+
|
151
|
+
The frequency with which that the dataset needs to be updated. Acceptable strings are _daily_, _weekly_, _monthly_, _quarterly_, and _never_.
|
152
|
+
h2. data_assets
|
153
|
+
|
154
|
+
Describes each data asset (a homogeneous chunk of data) and its location relative to the icss itself. This section is written in the icss as an array of hashes where the fields of each hash are as follows:
|
155
|
+
|
156
|
+
* @name@ - The name of the data asset.
|
157
|
+
* @location@ - The _relative_ uri of the described data asset. This is always assumed to be a directory and contains one or more actual files. All of the files must be homogeneous amongst themselves in that they are identically formatted (and have the same fields in the same order when that makes sense).
|
158
|
+
* @type@ - The fully qualified @type@ of the described data asset. The @type@ must be defined as a @record@ in the @types@ section of the icss.
|
159
|
+
|
160
|
+
h2. code_assets
|
161
|
+
|
162
|
+
Describes each code asset (an auxillary piece of code) and its location relative to the icss itself. A typical code asset is the full definition of classes and functions required to implement a message in the @messages@ section of the icss. The code_assets section is written in the icss as an array of hashes where the fields of each hash are as follows:
|
163
|
+
|
164
|
+
* @name@ - The name of the code asset.
|
165
|
+
* @location@ - The location, _relative_ to the icss, of the code asset.
|
166
|
+
|
167
|
+
h2. types
|
168
|
+
|
169
|
+
Defines named record types for each data asset, message request, and message response. It is an array of valid avro @record@ schemas, and adheres to the 1.4.1 avro specification. Each entry in the array is called a @named record type@. Its fields may be composed of primitive types (eg. string, int, float) as well as other named types so long as they are defined _previously_ in the @types@ array. Any referenced types must be defined in the types array before any definitions that refer to them. A @type@ definition can have the following fields:
|
170
|
+
|
171
|
+
* @name@ - The name of the defined type.
|
172
|
+
* @type@ - The @type@ of the defined type (think of it as the superclass). Typically @record@.
|
173
|
+
* @doc@ - Top level documentation of this type describing what it is and some justification for its existence.
|
174
|
+
* @fields@ - An array of @field@ hashes. See below.
|
175
|
+
|
176
|
+
h3. fields
|
177
|
+
|
178
|
+
The @fields@ section of a @record@ definition is an array of hashes with the following fields:
|
179
|
+
|
180
|
+
* @name@ - the name of the field (required)
|
181
|
+
* @type@ - the @type@ of the field: either a primitive type, a named type, or an avro schema (as defined above under _types_). A complex type can be defined in line as a full type definition or previously in the @types@ array. See primitive types. Note: you *must* define a named type before referring to it. Recursive type definitions are currently unsupported. To decide whether to define a named type inline, consider how the documentationwill ultimately read.
|
182
|
+
* @doc@ - a string describing this field for users (optional).
|
183
|
+
* @default@ - a default value for this field, used when reading instances that lack this field. Note: do _not_ use the @default@ attribute to show an "example" parameter, only to show the value used if none is supplied.
|
184
|
+
* @order@: - specifies how this field impacts sort ordering of this record (optional). Valid values are "ascending" (the default), "descending", or "ignore". For more details on how this is used, see the the sort order section in the avro spec.
|
185
|
+
|
186
|
+
h4. Extended attributes
|
187
|
+
|
188
|
+
* @index@ - a string naming the index group, for databases that choose to take advantage of it (optional).
|
189
|
+
* @unique@ - index is unique (optional)
|
190
|
+
* @length@ - a length constraint, for downstream consumers that choose to take advantage of it (optional).
|
191
|
+
|
192
|
+
h3. Primitive Types
|
193
|
+
|
194
|
+
The set of primitive type names is:
|
195
|
+
|
196
|
+
* @null@ - no value
|
197
|
+
* @boolean@ - a binary value
|
198
|
+
* @int@ - 32-bit signed integer
|
199
|
+
* @long@ - 64-bit signed integer
|
200
|
+
* @float@ - single precision (32-bit) IEEE 754 floating-point number
|
201
|
+
* @double@ - double precision (64-bit) IEEE 754 floating-point number
|
202
|
+
* @bytes@ - sequence of 8-bit unsigned bytes
|
203
|
+
* @string@ - unicode character sequence
|
204
|
+
|
205
|
+
h2. messages
|
206
|
+
|
207
|
+
This section defines the messages (remote procedure calls) against the collection. It is a hash where each entry has the the following fields:
|
208
|
+
|
209
|
+
* @request@ - An list of arguments to the message (required).. A request has two fields, a @name@ and a @type@ where the @type@ is defined in the @types@ section. All infochimps API calls take a single argument of either a named @record@ type or a @map@ (hash). Although in principle the argument list is processed equivalently to the fields in a @record@ type schema, we demand that the list have exactly one element and that it refer to a named type defined at top level in the @types@ section.
|
210
|
+
* @response@ - The named @type@ for the response (required), defined at top level in the protocol's @types@ section.
|
211
|
+
* @errors@ - An optional array of named types that this message throws as errors (optional).
|
212
|
+
* @doc@ - Documentation specific to this message (optional).
|
213
|
+
* @samples@ - Sample call request and response/error contents (optional; see below).
|
214
|
+
|
215
|
+
h3. samples
|
216
|
+
|
217
|
+
A message may optionally supply @samples@, an array of SampleMessageCalls. Each SampleMessageCall is a hash with the following fields:
|
218
|
+
|
219
|
+
* @request@ - an array of one element giving a hash suitable for populating the message's @request@ type.
|
220
|
+
* @url@ - Rather than supply the request params, you may supply them as a URL. While the URL may be used to imply request parameters, it is permitted to be inconsistent.
|
221
|
+
* @response@ - a hash suitable for populating the message's @response@ type (optional).
|
222
|
+
* @error@ - in place of a response, an error expected for this call, from the message's error types.
|
223
|
+
|
224
|
+
h2. targets
|
225
|
+
|
226
|
+
This section defines publishing targets for the collection. It is an optional field, and each target type is optional as well. It is a hash with the following possible elements:
|
227
|
+
|
228
|
+
h3. Summary
|
229
|
+
|
230
|
+
* *@Mysql@* - Create tables and push data into MySQL
|
231
|
+
* *@Hbase@* - Create tables and push data into HBase
|
232
|
+
* *@ElasticSearch@* - Index data into Elasticsearch
|
233
|
+
* *@Catalog@* - Create a dataset page on the "Infochimps":http://www.infochimps.com site.
|
234
|
+
- Attaches API documentation and Packages
|
235
|
+
* *@Apeyeye@* - Publishes ICSS and Endpoint code assets to the Apeyeye
|
236
|
+
* *@GeoIndex@* - Index data into GeoIndex
|
237
|
+
|
238
|
+
h3. catalog
|
239
|
+
|
240
|
+
An array of CatalogTarget hashes. Each CatalogTarget hash describes a catalog entry on the Infochimps data catalog. It contains the following fields:
|
241
|
+
|
242
|
+
* @name@ - The name of the catalog entry. [String] (required)
|
243
|
+
* @title@ - The display title of the catalog entry. [String] (required)
|
244
|
+
* @description@ - A full description (can be textile) for the catalog entry that is displayed in the overview section on the catalog. [String] (required)
|
245
|
+
* @license@ - The name of an existing license on Infochips.com [String] (optional)
|
246
|
+
* @link@ - The source URL of the data described by this catalog entry. [String] (optional)
|
247
|
+
* @owner@ - Who will own this catalog entry on Infochimps.com; the user must already exist. [String] (optional, defaults to user **Infochimps**)
|
248
|
+
* @price@ - The price in USD that this catalog's packages will be available for. Data is free if price is not provided [Float] (optional)
|
249
|
+
* @tags@ - An array of tags describing the catalog entry. Tags may only contain characters within [a-z0-9_] (lowercase alphanumeric or underscore). The publishing system MAY augment these tags with additional ones extracted from the protocol as a whole. (optional)
|
250
|
+
|
251
|
+
* @messages@ - An array of message names to attach to the catalog entry. Each message will be fully documented on the catalog. The last message in the array will be available to explore with the api explorer.
|
252
|
+
* @packages@ - An array of hashes. Each hash has a field called @data_assets@ which is an array of named data assets. Each data asset in the array will be bundled together into a single bulk download on the catalog.
|
253
|
+
|
254
|
+
h3. Apeyeye
|
255
|
+
|
256
|
+
* @code_assets@ - An array of named code_assets (required to exist in the top-level @code_assets@ section) to copy to an Apeyeye repository
|
257
|
+
|
258
|
+
h4. Note
|
259
|
+
|
260
|
+
* The @Apeyeye@ target also places a copy of the @ICSS@ next to the endpoint code assets.
|
261
|
+
|
262
|
+
* The @Apeyeye@ target creates necessary subdirectories and copies data_assets to the directory specified by the @apeyeye:repo_path@ in the @troop.yaml@ configuration file.
|
263
|
+
Troop will **not** currently perform an actual deployment to the cluster running the Infochimps API. After Troop has finished, the @endpoint@ and @icss.yaml@ files will need to be manually committed to the repo and the Chef deployment performed.
|
264
|
+
|
265
|
+
h3. mysql
|
266
|
+
|
267
|
+
An array of MysqlTarget hashes. Each MysqlTarget hash describes how to map one or more data_assets into a MySQL database. It contains the following fields:
|
268
|
+
|
269
|
+
* @database@ - The mysql database to write data into (required).
|
270
|
+
* @table_name@ - The table name to write data into (required).
|
271
|
+
* @data_assets@ - An array of named data assets (required to exist in the @data_assets@ section) to write to @database@.@table_name@. (required).
|
272
|
+
|
273
|
+
h3. hbase
|
274
|
+
|
275
|
+
An array of HbaseTarget hashes. There are two possibilities here:
|
276
|
+
|
277
|
+
h4. Fourple
|
278
|
+
|
279
|
+
Here your data itself will contain the hbase column family, and column name each line of data will be stored into. In this case the HbaseTarget hash will contain the following fields:
|
280
|
+
|
281
|
+
* @table_name@ - The hbase table to write data into (required)
|
282
|
+
* @column_families@ - An array of hbase column families that data will be written into (required). These column families will be created if they do not already exist.
|
283
|
+
* @loader@ - 'fourple_loader', this tells Troop to load _this_ target with the FourpleLoader class (required)
|
284
|
+
* @data_assets@ - An array of named data assets (required to exist in the @data_assets@ section. (required)
|
285
|
+
|
286
|
+
Your data *must* have the following schema to use this loader:
|
287
|
+
|
288
|
+
(@row_key@, @column_family@, @column_name@, @column_value@)
|
289
|
+
|
290
|
+
and optionally,
|
291
|
+
|
292
|
+
(@row_key@, @column_family@, @column_name@, @column_value@, @timestamp@)
|
293
|
+
|
294
|
+
where @timestamp@ is unix time.
|
295
|
+
|
296
|
+
h4. Tsv
|
297
|
+
|
298
|
+
Here your data will simply be tsv records. In this case the HbaseTarget hash will contain the following fields:
|
299
|
+
|
300
|
+
* @table_name@ - The hbase table to write data into (required)
|
301
|
+
* @column_family@ - An single hbase column family that data will be written into (required). This column family will be created if it does not already exist.
|
302
|
+
* @id_field@ - The name of a field to use as the row key during indexing (required).
|
303
|
+
* @loader@ - 'tsv_loader', this tells Troop to load _this_ target with the TsvLoader class (required)
|
304
|
+
* @data_assets@ - An array of named data assets (required to exist in the @data_assets@ section. (required)
|
305
|
+
|
306
|
+
h3. geo_index
|
307
|
+
|
308
|
+
This is for storing data into the infochimps geo index. An array of GeoIndexTarget hashes. Each hash has the following fields:
|
309
|
+
|
310
|
+
* @table_name@ - The hbase table to write data into. It *must* be one of @geo_location_infochimps_place@, @geo_location_infochimps_event@, or @geo_location_infochimps_path@ (required)
|
311
|
+
* @data_assets@ - An array of named data assets (required to exist in the @data_assets@ section. (required)
|
312
|
+
* @min_zoom@ - An integer specifying the minimum zoom level at which to index the data. Defaults to 3 (optional)
|
313
|
+
* @max_zoom@ - An integer specifying the maximum zoom level at which to index the data. Defaults to 6 (optional)
|
314
|
+
* @chars_per_page@ - An integer number of approximate characters per page. One or more pages, combined into a geoJSON "FeatureCollection" are returned from the infochimps geo api. This parameter affects how large a single page is. (required)
|
315
|
+
* @sort_field@ - A field within the @properties@ portion of each geoJSON feature indexed. This @sort_field@ will be used to sort the pages returned when a request is made against the infochimps geo api. Use '-1' if there is no sort field.
|
316
|
+
|
317
|
+
h3. elastic_search
|
318
|
+
|
319
|
+
An array of ElasticSearchTarget hashes. Each ElasticSearchTarget hash describes how to map one or more data_assets into an ElasticSearch data store. It contains the following fields:
|
320
|
+
|
321
|
+
* @index_name@ - The name of the index to write data into (required). It need not already exist.
|
322
|
+
* @object_type@ - The object type create (required). Many different types of objects can exist in the same index. Each @object_type@ has its own schema that will be updated dynamically by ElasticSearch as records of that type are indexed. If this dynamism is unwanted (in the case you have more complex fields like @date@ or @geo_point@) then you should use the rest API and PUT the appropriate schema mapping ahead of time.
|
323
|
+
* @id_field@ - The name of a field within @object_type@ to use as an inherent id during indexing (optional). If this field is omitted the records will be assigned an id dynamically during indexing.
|
324
|
+
* @data_assets@ - An array of named data assets (required to exist in the @data_assets@ section), to write to the index specified by @index_name@ with the schema mapping specified by @object_type@.
|
325
|
+
* @loader@ - The way in which to load the data. One of @tsv_loader@ or @json_loader@. Choose the appropriate one based on what type of data you have.
|
326
|
+
|
327
|
+
h2. Differences between ICSS and Avro
|
328
|
+
|
329
|
+
Note the following limitations on the avro spec:
|
330
|
+
|
331
|
+
* No recursive definitions. A named type may not include a schema that eventually refers to the named type itself.
|
332
|
+
* All top-level schemas must be @record@ types.
|
333
|
+
* Do not use @union@ or @enum@ schema types.
|
334
|
+
* Do not define a complex schema in the request or response portion of a message -- you MUST use a named type defined at top level in the @types@ section, or use type @map@.
|
335
|
+
* Every message request must be an array of exactly one element.
|
336
|
+
|
337
|
+
h2. Proposed Alterations and Extensions to the spec
|
338
|
+
|
339
|
+
h3. Versioning
|
340
|
+
|
341
|
+
We need to take advantage of the Avro versioning abilities
|
342
|
+
|
343
|
+
h3. Many Catalog properties belong at top level
|
344
|
+
|
345
|
+
The catalog target entries should be changed:
|
346
|
+
|
347
|
+
* @name@ is redundant, should be the fullname of the protocol with '.' converted to '-' and used as a durable handle for the catalog entry.
|
348
|
+
* @title@ should become a top-level attribute of the protocol.
|
349
|
+
* @tags@ should become a top-level attribute of the protocol.
|
350
|
+
* @description@ should be called @doc@. The catalog description should be the catalog target's @doc@ prepended to the protocol's @doc@.
|
351
|
+
* @messages@ should go away - all messages should be published by default.
|
352
|
+
|
353
|
+
These attributes should be added at the top level:
|
354
|
+
|
355
|
+
* @created_at@ - fixes the notional created_at date of the dataset.
|
356
|
+
* @created_by@ - handle for the infochimps user to credit
|
357
|
+
* @collection@ - handle for a collection to tie the dataset.
|
358
|
+
* @link@ - URL to consider the primary reference link
|
359
|
+
* @sources@ - array of handles for the dataset's sources.
|
360
|
+
* @license@ - handle for the dataset's license
|
361
|
+
|
362
|
+
h3. Documentation Extensions
|
363
|
+
|
364
|
+
* @doc_order@ - On @types@, gives the order to display their documentation. Since we have to
|
365
|
+
* @doc_hide@ - Hide this type when displaying documentation.
|
366
|
+
* @doc_long@ - For cases where the doc string extends to many pages, lets us separate the doc into an abstract (in @doc@) and extended description (in @doc_long@).
|
367
|
+
|
368
|
+
h3. Extended Primitive Types
|
369
|
+
|
370
|
+
I'd (flip) like a way for a client to, on a best-effort basis, accept @time@, @date@, and @symbol@. This could be done by allowing those as primitive types, or by saying they are { "type":"string", "extended_type":"iso_time" }. Needs more thought.
|
data/init.rb
ADDED
data/lib/icss.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'icss/core_ext' unless Object.respond_to?(:class_attribute)
|
2
|
+
|
3
|
+
require 'gorillib/receiver'
|
4
|
+
require 'gorillib/receiver/acts_as_hash'
|
5
|
+
require 'gorillib/receiver/acts_as_loadable'
|
6
|
+
require 'gorillib/receiver/validations'
|
7
|
+
require 'time' # ain't that always the way
|
8
|
+
|
9
|
+
$: << File.dirname(__FILE__)
|
10
|
+
require 'icss/validations'
|
11
|
+
require 'icss/type'
|
12
|
+
require 'icss/message'
|
13
|
+
require 'icss/sample_message_call'
|
14
|
+
require 'icss/data_asset'
|
15
|
+
require 'icss/code_asset'
|
16
|
+
require 'icss/target'
|
17
|
+
require 'icss/protocol'
|
18
|
+
|
19
|
+
require 'icss/type/factory'
|
data/lib/icss/brevity.rb
ADDED
@@ -0,0 +1,136 @@
|
|
1
|
+
#
|
2
|
+
# Doing
|
3
|
+
#
|
4
|
+
# require 'icss/brevity'
|
5
|
+
#
|
6
|
+
# makes the #inspect method on Icss::Type's be nice and readable,
|
7
|
+
# not GIGANTE PIQUANTE OY CABRON
|
8
|
+
#
|
9
|
+
#
|
10
|
+
module Icss
|
11
|
+
|
12
|
+
Protocol.class_eval do
|
13
|
+
def inspect
|
14
|
+
["#<#{self.class.name}",
|
15
|
+
inspect_hsh.map{|k,v| "#{k}=#{v}" },
|
16
|
+
">"
|
17
|
+
].join(" ")
|
18
|
+
end
|
19
|
+
|
20
|
+
def inspect_hsh
|
21
|
+
{
|
22
|
+
:name => name,
|
23
|
+
:namespace => @namespace,
|
24
|
+
:types => (types||[]).map(&:name).inspect,
|
25
|
+
:messages => (messages||{}).values.map(&:name).inspect,
|
26
|
+
:data_assets => (data_assets||[]).map(&:name).inspect,
|
27
|
+
:doc => "'#{(doc||"")[0..30].gsub(/[\n\t\r]+/,' ')}...'",
|
28
|
+
}
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
Message.class_eval do
|
33
|
+
def inspect
|
34
|
+
["#<#{self.class.name}",
|
35
|
+
inspect_hsh.map{|k,v| "#{k}=#{v}" },
|
36
|
+
">"
|
37
|
+
].join(" ")
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
# stuff a compact cartoon of the fields in there
|
42
|
+
def inspect_hsh
|
43
|
+
{
|
44
|
+
:name => name,
|
45
|
+
:request => summary_of_request_attr, # (request||[]).map(&:type).map(&:name),
|
46
|
+
:response => summary_of_response_attr,
|
47
|
+
:errors => errors.inspect,
|
48
|
+
:protocol => (protocol.present? ? protocol.protocol : nil),
|
49
|
+
:doc => "'#{(doc||"")[0..30].gsub(/[\n\t\r]+/,' ')}...'",
|
50
|
+
}
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
Type.class_eval do
|
55
|
+
def inspect
|
56
|
+
["#<#{self.class.name}",
|
57
|
+
@type,
|
58
|
+
inspect_hsh.map{|k,v| "#{k}=#{v}" },
|
59
|
+
">",
|
60
|
+
].compact.join(" ")
|
61
|
+
end
|
62
|
+
private
|
63
|
+
def inspect_hsh
|
64
|
+
{ :name => name,
|
65
|
+
:doc => "'#{(doc||"")[0..30].gsub(/[\n\t\r]+/,' ')}...'", }
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
RecordField.class_eval do
|
70
|
+
def inspect
|
71
|
+
["#<#{self.class.name}",
|
72
|
+
inspect_hsh.map{|k,v| "#{k}=#{v}" },
|
73
|
+
">",
|
74
|
+
].compact.join(" ")
|
75
|
+
end
|
76
|
+
private
|
77
|
+
def inspect_hsh
|
78
|
+
{ :name => name,
|
79
|
+
:type => expand_type,
|
80
|
+
:default => default,
|
81
|
+
:order => @order,
|
82
|
+
:doc => "'#{(doc||"")[0..30].gsub(/[\n\t\r]+/,' ')}...'",
|
83
|
+
}.reject{|k,v| v.nil? }
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
PrimitiveType.class_eval do
|
88
|
+
def inspect
|
89
|
+
"#<#{self.class.name} #{name}>"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
NamedType.class_eval do
|
94
|
+
private
|
95
|
+
def inspect_hsh
|
96
|
+
super.merge( :namespace => @namespace )
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
RecordType.class_eval do
|
101
|
+
private
|
102
|
+
def inspect_hsh
|
103
|
+
super.merge( :fields => (fields||[]).inject({}){|h,f| h[f.name] = ((f.type.present? && f.is_reference?) ? f.type.name : f.type) ; h }.inspect )
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
EnumType.class_eval do
|
108
|
+
private
|
109
|
+
def inspect_hsh
|
110
|
+
super.merge( :symbols => symbols.inspect )
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
FixedType.class_eval do
|
115
|
+
private
|
116
|
+
def inspect_hsh
|
117
|
+
super.merge( :size => size.inspect )
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
ArrayType.class_eval do
|
122
|
+
private
|
123
|
+
def inspect_hsh
|
124
|
+
super.merge( :items => items.inspect )
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
MapType.class_eval do
|
129
|
+
private
|
130
|
+
def inspect_hsh
|
131
|
+
super.merge( :values => values.inspect )
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
end
|
136
|
+
|