icss-activesupport-4 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. checksums.yaml +7 -0
  2. data/.document +5 -0
  3. data/.rspec +3 -0
  4. data/.watchr +52 -0
  5. data/CHANGELOG.md +38 -0
  6. data/Gemfile +22 -0
  7. data/LICENSE.textile +20 -0
  8. data/README.md +298 -0
  9. data/Rakefile +39 -0
  10. data/TODO.md +44 -0
  11. data/VERSION +1 -0
  12. data/examples/avro_examples/BulkData.avpr +21 -0
  13. data/examples/avro_examples/complicated.icss.yaml +159 -0
  14. data/examples/avro_examples/interop.avsc +32 -0
  15. data/examples/avro_examples/mail.avpr +20 -0
  16. data/examples/avro_examples/namespace.avpr +28 -0
  17. data/examples/avro_examples/org/apache/avro/ipc/HandshakeRequest.avsc +11 -0
  18. data/examples/avro_examples/org/apache/avro/ipc/HandshakeResponse.avsc +15 -0
  19. data/examples/avro_examples/org/apache/avro/ipc/trace/avroTrace.avdl +64 -0
  20. data/examples/avro_examples/org/apache/avro/ipc/trace/avroTrace.avpr +82 -0
  21. data/examples/avro_examples/org/apache/avro/mapred/tether/InputProtocol.avpr +59 -0
  22. data/examples/avro_examples/org/apache/avro/mapred/tether/OutputProtocol.avpr +75 -0
  23. data/examples/avro_examples/simple.avpr +70 -0
  24. data/examples/avro_examples/weather.avsc +9 -0
  25. data/examples/bnc.icss.yaml +70 -0
  26. data/examples/chronic.icss.yaml +115 -0
  27. data/examples/license.icss.yaml +7 -0
  28. data/examples/source1.icss.yaml +4 -0
  29. data/examples/source2.icss.yaml +4 -0
  30. data/examples/test_icss.yaml +67 -0
  31. data/icss.gemspec +168 -0
  32. data/icss_specification.textile +393 -0
  33. data/lib/icss.rb +53 -0
  34. data/lib/icss/core_types.rb +20 -0
  35. data/lib/icss/error.rb +4 -0
  36. data/lib/icss/init.rb +3 -0
  37. data/lib/icss/message.rb +133 -0
  38. data/lib/icss/message/message_sample.rb +144 -0
  39. data/lib/icss/protocol.rb +199 -0
  40. data/lib/icss/protocol/code_asset.rb +18 -0
  41. data/lib/icss/protocol/data_asset.rb +23 -0
  42. data/lib/icss/protocol/license.rb +41 -0
  43. data/lib/icss/protocol/source.rb +37 -0
  44. data/lib/icss/protocol/target.rb +68 -0
  45. data/lib/icss/receiver_model.rb +24 -0
  46. data/lib/icss/receiver_model/active_model_shim.rb +36 -0
  47. data/lib/icss/receiver_model/acts_as_catalog.rb +174 -0
  48. data/lib/icss/receiver_model/acts_as_hash.rb +177 -0
  49. data/lib/icss/receiver_model/acts_as_loadable.rb +47 -0
  50. data/lib/icss/receiver_model/acts_as_tuple.rb +100 -0
  51. data/lib/icss/receiver_model/locale/en.yml +27 -0
  52. data/lib/icss/receiver_model/to_geo_json.rb +19 -0
  53. data/lib/icss/receiver_model/tree_merge.rb +34 -0
  54. data/lib/icss/receiver_model/validations.rb +31 -0
  55. data/lib/icss/serialization.rb +51 -0
  56. data/lib/icss/serialization/zaml.rb +442 -0
  57. data/lib/icss/type.rb +168 -0
  58. data/lib/icss/type/base_type.rb +0 -0
  59. data/lib/icss/type/named_type.rb +185 -0
  60. data/lib/icss/type/record_field.rb +77 -0
  61. data/lib/icss/type/record_model.rb +49 -0
  62. data/lib/icss/type/record_schema.rb +54 -0
  63. data/lib/icss/type/record_type.rb +325 -0
  64. data/lib/icss/type/simple_types.rb +71 -0
  65. data/lib/icss/type/structured_schema.rb +288 -0
  66. data/lib/icss/type/type_factory.rb +144 -0
  67. data/lib/icss/type/union_schema.rb +41 -0
  68. data/lib/icss/view_helper.rb +65 -0
  69. data/notes/named_array.md +32 -0
  70. data/notes/on_include_vs_extend_etc.rb +176 -0
  71. data/notes/technical_details.md +278 -0
  72. data/spec/core_types_spec.rb +119 -0
  73. data/spec/fixtures/zaml_complex_hash.yaml +35 -0
  74. data/spec/icss_spec.rb +90 -0
  75. data/spec/message/message_sample_spec.rb +4 -0
  76. data/spec/message_spec.rb +139 -0
  77. data/spec/protocol/license_spec.rb +67 -0
  78. data/spec/protocol/protocol_catalog_spec.rb +48 -0
  79. data/spec/protocol/protocol_validations_spec.rb +176 -0
  80. data/spec/protocol/source_spec.rb +65 -0
  81. data/spec/protocol_spec.rb +170 -0
  82. data/spec/receiver_model_spec.rb +115 -0
  83. data/spec/serialization/zaml_spec.rb +82 -0
  84. data/spec/serialization/zaml_test.rb +473 -0
  85. data/spec/serialization_spec.rb +63 -0
  86. data/spec/spec_helper.rb +39 -0
  87. data/spec/support/icss_test_helper.rb +67 -0
  88. data/spec/support/load_example_protocols.rb +17 -0
  89. data/spec/type/base_type_spec.rb +0 -0
  90. data/spec/type/named_type_spec.rb +75 -0
  91. data/spec/type/record_field_spec.rb +44 -0
  92. data/spec/type/record_model_spec.rb +206 -0
  93. data/spec/type/record_schema_spec.rb +161 -0
  94. data/spec/type/record_type_spec.rb +155 -0
  95. data/spec/type/simple_types_spec.rb +121 -0
  96. data/spec/type/structured_schema_spec.rb +300 -0
  97. data/spec/type/type_catalog_spec.rb +44 -0
  98. data/spec/type/type_factory_spec.rb +93 -0
  99. data/spec/type/union_schema_spec.rb +0 -0
  100. data/spec/type_spec.rb +63 -0
  101. metadata +304 -0
data/Rakefile ADDED
@@ -0,0 +1,39 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'rake'
11
+
12
+ require 'jeweler'
13
+ Jeweler::Tasks.new do |gem|
14
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
15
+ gem.name = "icss"
16
+ gem.homepage = "http://github.com/mrflip/icss"
17
+ gem.license = "MIT"
18
+ gem.summary = %Q{Infochimps Simple Schema library: an avro-compatible data description standard. ICSS completely describes a collection of data (and associated assets) in a way that is expressive, scalable and sufficient to drive remarkably complex downstream processes.}
19
+ gem.description = %Q{Infochimps Simple Schema library: an avro-compatible data description standard. ICSS completely describes a collection of data (and associated assets) in a way that is expressive, scalable and sufficient to drive remarkably complex downstream processes.}
20
+ gem.email = "coders@infochimps.com"
21
+ gem.authors = ["Philip (flip) Kromer for Infochimps"]
22
+ end
23
+ Jeweler::RubygemsDotOrgTasks.new
24
+
25
+ require 'rspec/core'
26
+ require 'rspec/core/rake_task'
27
+ RSpec::Core::RakeTask.new(:spec) do |spec|
28
+ spec.pattern = FileList['spec/**/*_spec.rb']
29
+ end
30
+
31
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
32
+ spec.pattern = 'spec/**/*_spec.rb'
33
+ spec.rcov = true
34
+ end
35
+
36
+ task :default => :spec
37
+
38
+ require 'yard'
39
+ YARD::Rake::YardocTask.new
data/TODO.md ADDED
@@ -0,0 +1,44 @@
1
+ # Proposed ICSS refactoring:
2
+
3
+ * one ICSS <=> one Dataset <=> one Catalog Entry
4
+
5
+
6
+ geo.lake_body_of_water
7
+ geo.ocean_body_of_water
8
+ geo.river_body_of_water
9
+ geo.sea_body_of_water
10
+
11
+ ### catalog_entry
12
+
13
+ Most of it moves to top-level:
14
+ * tags: move to top-level attribute
15
+ * title: disappears -- take from `protocol`
16
+ * description: disappears -- take from `doc`
17
+ * owner: stays in catalog_entry
18
+ * price: attach to bulk data target
19
+
20
+ * messages: only necessary to select among messages
21
+ * packages: only necessary to select among packages
22
+
23
+ move to a new top-level section, `provenance`:
24
+
25
+ * license
26
+ * link -> becomes sources
27
+
28
+ moves *to* catalog_entry:
29
+
30
+ * `under_consideration`
31
+ * `update_frequency` (?? def. doesn't feel like a top-level thing, but this feels weird too)
32
+
33
+
34
+ ___________________________________________________________________________
35
+
36
+ Make the get_data part of culture/books/comics/grand_comics_db a three-liner
37
+
38
+ menu string Either the actual menu or a URL of the menu.
39
+ accepts_reservations string "Either Yes/No, or a URL at which reservations can
40
+ be made."
41
+
42
+
43
+ tile_x string | bounding_box
44
+ tile_y string | bounding_box
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.4.0
@@ -0,0 +1,21 @@
1
+
2
+ {"namespace": "org.apache.avro.test",
3
+ "protocol": "BulkData",
4
+
5
+ "types": [],
6
+
7
+ "messages": {
8
+
9
+ "read": {
10
+ "request": [],
11
+ "response": "bytes"
12
+ },
13
+
14
+ "write": {
15
+ "request": [ {"name": "data", "type": "bytes"} ],
16
+ "response": "null"
17
+ }
18
+
19
+ }
20
+
21
+ }
@@ -0,0 +1,159 @@
1
+ ---
2
+ namespace: util.time
3
+ protocol: chronic
4
+
5
+ doc: >-
6
+ An API call to parse human-readable date / time strings
7
+
8
+ data_assets: []
9
+
10
+ code_assets:
11
+ - location: code/chronic_endpoint.rb
12
+ type: apeyeye_endpoint
13
+
14
+ types:
15
+ - name: a_atsigns_b_params
16
+ type: record
17
+ doc: ""
18
+ fields:
19
+ - name: user_a_id
20
+ type: int
21
+ - name: user_a_sn
22
+ type: string
23
+ - name: user_b_id
24
+ type: int
25
+ - name: user_b_sn
26
+ type: string
27
+
28
+
29
+ - name: your_mom
30
+ type: record
31
+ doc: "hi"
32
+ fields:
33
+ - name: went_to_college
34
+ type: int
35
+
36
+ - name: chronic_parse_params
37
+ type: record
38
+ doc:
39
+ Query API parameters for the /util/time/chronic/parse call
40
+ fields:
41
+ - name: context
42
+ type: string
43
+ doc: >-
44
+ <tt>:past</tt> or <tt>:future</tt> (defaults to <tt>:future</tt>)
45
+
46
+ If your string represents a birthday, you can set
47
+ <tt>:context</tt> to <tt>:past</tt> and if an ambiguous string is
48
+ given, it will assume it is in the past. Specify <tt>:future</tt>
49
+ or omit to set a future context.
50
+
51
+ - name: now
52
+ type: string
53
+ doc: >-
54
+ Time (defaults to Time.now)
55
+
56
+ By setting <tt>:now</tt> to a Time, all computations will be based off
57
+ of that time instead of Time.now. If set to nil, Chronic will use Time.now.
58
+
59
+ - name: guess
60
+ type: boolean
61
+ doc: >-
62
+ +true+ or +false+ (defaults to +true+)
63
+
64
+ By default, the parser will guess a single point in time for the
65
+ given date or time. If you'd rather have the entire time span returned,
66
+ set <tt>:guess</tt> to +false+ and a Chronic::Span will be returned.
67
+
68
+ - name: ambiguous_time_range
69
+ type: int
70
+ doc: >-
71
+ Integer or <tt>:none</tt> (defaults to <tt>6</tt> (6am-6pm))
72
+
73
+ If an Integer is given, ambiguous times (like 5:00) will be
74
+ assumed to be within the range of that time in the AM to that time
75
+ in the PM. For example, if you set it to <tt>7</tt>, then the parser will
76
+ look for the time between 7am and 7pm. In the case of 5:00, it would
77
+ assume that means 5:00pm. If <tt>:none</tt> is given, no assumption
78
+ will be made, and the first matching instance of that time will
79
+ be used.
80
+ - name: field_that_is_recursive
81
+ type: your_mom
82
+ doc: wears combat boots
83
+
84
+ - name: field_that_is_an_array
85
+ type: array
86
+ items: int
87
+ doc: I am an array of primitives
88
+ - name: field_that_is_a_named_type_holy_fuck
89
+ type:
90
+ - type: array
91
+ items: your_mom
92
+ doc: I am an array of Iccs::Type::YourMomType
93
+
94
+ - name: chronic_parse_response
95
+ type: record
96
+ doc: |-
97
+ Query API response for the /util/time/chronic/parse call
98
+ fields:
99
+ - name: time
100
+ doc: >-
101
+ The UTC parsed time, as a "ISO 8601 combined date time":http://en.wikipedia.org/wiki/ISO_8601 string.
102
+ type: string
103
+ - name: epoch_seconds
104
+ doc: >-
105
+ The UTC parsed time, as "epoch seconds":http://en.wikipedia.org/wiki/Epoch_seconds integer.
106
+ type: int
107
+ - name: params
108
+ doc: >-
109
+ Your params, right back atcha'
110
+ type: string # map
111
+ - name: errors
112
+ doc: >-
113
+ Error conditions
114
+ type: array
115
+ optional: true
116
+ - name: whatever
117
+ doc: dummy string
118
+ type:
119
+ name: embedded_record_def
120
+ type: record
121
+ doc: I am an inline record definition
122
+ fields:
123
+ - name: name
124
+ type: string
125
+
126
+ messages:
127
+ parse:
128
+ request:
129
+ - name: params
130
+ type: chronic_parse_params
131
+ response: chronic_parse_response
132
+ doc_file:
133
+ README.md
134
+
135
+ #
136
+ # targets:
137
+ # mysql:
138
+ # # Name of the data asset, mysql table name will derived from this
139
+ # - table_name: word_freq_bnc
140
+ # database: lang_corp_word_freq_bnc
141
+ # data_assets:
142
+ # - word_freq_bnc_data_asset
143
+ #
144
+ # apidocs:
145
+ # - dest_path: /language/corpora/word_freq_bnc/token_frequency
146
+ #
147
+ # bulkdownload:
148
+ # - package_name: word_freq_bnc
149
+ # data_assets:
150
+ # - word_freq_bnc_data_asset
151
+ #
152
+ # catalog:
153
+ # # Name of the catalogue entry
154
+ # - name: word_freq_bnc
155
+ # title: Word Frequencies From the British National Corpus
156
+ # messages:
157
+ # - token_frequency
158
+ # packages:
159
+ # - word_freq_bnc
@@ -0,0 +1,32 @@
1
+ { "name":"Interop",
2
+ "namespace": "org.apache.avro",
3
+ "type": "record",
4
+ "fields": [
5
+ {"name": "intField", "type": "int"},
6
+ {"name": "longField", "type": "long"},
7
+ {"name": "stringField", "type": "string"},
8
+ {"name": "boolField", "type": "boolean"},
9
+ {"name": "floatField", "type": "float"},
10
+ {"name": "doubleField", "type": "double"},
11
+ {"name": "bytesField", "type": "bytes"},
12
+ {"name": "nullField", "type": "null"},
13
+ {"name": "arrayField", "type": {"type": "array", "items": "double"}},
14
+ {"name": "mapField", "type":
15
+ { "type": "map", "values":
16
+ { "name": "Foo",
17
+ "type": "record",
18
+ "fields": [{"name": "label", "type": "string"}]}
19
+ }},
20
+ {"name": "unionField", "type":
21
+ ["boolean", "double", {"type": "array", "items": "bytes"}] },
22
+ {"name": "enumField", "type":
23
+ {"name": "Kind", "type": "enum", "symbols": ["A","B","C"]} },
24
+ {"name": "fixedField", "type":
25
+ {"name": "MD5", "type": "fixed", "size": 16} },
26
+ {"name": "recordField", "type":
27
+ {"name": "Node", "type": "record",
28
+ "fields": [
29
+ {"name": "label", "type": "string"},
30
+ {"name": "children", "type": {"type": "array", "items": "Node"}}]}}
31
+ ]
32
+ }
@@ -0,0 +1,20 @@
1
+ {"namespace": "org.apache.avro.test",
2
+ "protocol": "Mail",
3
+
4
+ "types": [
5
+ {"name": "Message", "type": "record",
6
+ "fields": [
7
+ {"name": "to", "type": "string"},
8
+ {"name": "from", "type": "string"},
9
+ {"name": "body", "type": "string"}
10
+ ]
11
+ }
12
+ ],
13
+
14
+ "messages": {
15
+ "send": {
16
+ "request": [{"name": "message", "type": "Message"}],
17
+ "response": "string"
18
+ }
19
+ }
20
+ }
@@ -0,0 +1,28 @@
1
+ {"namespace": "org.apache.avro.test.namespace",
2
+ "protocol": "TestNamespace",
3
+
4
+ "types": [
5
+ {"name": "org.apache.avro.test.util.MD5", "type": "fixed", "size": 16},
6
+ {"name": "TestRecord", "type": "record",
7
+ "fields": [ {"name": "hash", "type": "org.apache.avro.test.util.MD5"} ]
8
+ },
9
+ {"name": "TestError", "namespace": "org.apache.avro.test.errors",
10
+ "type": "error", "fields": [ {"name": "message", "type": "string"} ]
11
+ }
12
+ ],
13
+
14
+ "messages": {
15
+ "echo": {
16
+ "request": [{"name": "record", "type": "TestRecord"}],
17
+ "response": "TestRecord"
18
+ },
19
+
20
+ "error": {
21
+ "request": [],
22
+ "response": "null",
23
+ "errors": ["org.apache.avro.test.errors.TestError"]
24
+ }
25
+
26
+ }
27
+
28
+ }
@@ -0,0 +1,11 @@
1
+ {
2
+ "name": "HandshakeRequest", "namespace":"org.apache.avro.ipc",
3
+ "type": "record",
4
+ "fields": [
5
+ {"name": "clientHash",
6
+ "type": {"name": "MD5", "type": "fixed", "size": 16}},
7
+ {"name": "clientProtocol", "type": ["null", "string"]},
8
+ {"name": "serverHash", "type": "MD5"},
9
+ {"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]}
10
+ ]
11
+ }
@@ -0,0 +1,15 @@
1
+ {
2
+ "name": "HandshakeResponse", "namespace": "org.apache.avro.ipc",
3
+ "type": "record",
4
+ "fields": [
5
+ {"name": "match",
6
+ "type": {"name": "HandshakeMatch", "type": "enum",
7
+ "symbols": ["BOTH", "CLIENT", "NONE"]}},
8
+ {"name": "serverProtocol",
9
+ "type": ["null", "string"]},
10
+ {"name": "serverHash",
11
+ "type": ["null", {"name": "MD5", "type": "fixed", "size": 16}]},
12
+ {"name": "meta",
13
+ "type": ["null", {"type": "map", "values": "bytes"}]}
14
+ ]
15
+ }
@@ -0,0 +1,64 @@
1
+ /**
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing, software
13
+ * distributed under the License is distributed on an "AS IS" BASIS,
14
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ * See the License for the specific language governing permissions and
16
+ * limitations under the License.
17
+ */
18
+
19
+ /**
20
+ * A Span is our basic unit of tracing. It tracks the critical points
21
+ * of a single RPC call and records other call meta-data. It also
22
+ * allows arbitrary string annotations. Both the client and server create
23
+ * Span objects, each of which is populated with half of the relevant event
24
+ * data. They share a span ID, which allows us to merge them into one complete
25
+ * span later on.
26
+ */
27
+ @namespace("org.apache.avro.ipc.trace")
28
+
29
+ protocol AvroTrace {
30
+ enum SpanEvent { SERVER_RECV, SERVER_SEND, CLIENT_RECV, CLIENT_SEND }
31
+
32
+ fixed ID(8);
33
+
34
+ record TimestampedEvent {
35
+ long timeStamp; // Unix time, in nanoseconds
36
+ union { SpanEvent, string} event;
37
+ }
38
+
39
+ record Span {
40
+ ID traceID; // ID shared by all Spans in a given trace
41
+ ID spanID; // Random ID for this Span
42
+ union { ID, null } parentSpanID; // Parent Span ID (null if root Span)
43
+ string messageName; // Function call represented
44
+ long requestPayloadSize; // Size (bytes) of the request
45
+ long responsePayloadSize; // Size (byts) of the response
46
+ union { string, null} requestorHostname; // Hostname of requestor
47
+ // int requestorPort; // Port of the requestor (currently unused)
48
+ union { string, null } responderHostname; // Hostname of the responder
49
+ // int responderPort; // Port of the responder (currently unused)
50
+ array<TimestampedEvent> events; // List of critical events
51
+ boolean complete; // Whether includes data from both sides
52
+ }
53
+
54
+ /**
55
+ * Get all spans stored on this host.
56
+ */
57
+ array<Span> getAllSpans();
58
+
59
+ /**
60
+ * Get spans occuring between start and end. Each is a unix timestamp
61
+ * in nanosecond units (for consistency with TimestampedEvent).
62
+ */
63
+ array<Span> getSpansInRange(long start, long end);
64
+ }