ul-wukong 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. checksums.yaml +15 -0
  2. data/.gitignore +60 -0
  3. data/.gitmodules +6 -0
  4. data/.rspec +2 -0
  5. data/.travis.yml +19 -0
  6. data/.yardopts +6 -0
  7. data/CHANGELOG.md +7 -0
  8. data/Gemfile +17 -0
  9. data/Guardfile +12 -0
  10. data/LICENSE.md +95 -0
  11. data/NOTES-travis.md +31 -0
  12. data/README-old.md +422 -0
  13. data/README.md +1308 -0
  14. data/Rakefile +28 -0
  15. data/TODO.md +99 -0
  16. data/bin/cutc +30 -0
  17. data/bin/cuttab +5 -0
  18. data/bin/greptrue +6 -0
  19. data/bin/md5sort +20 -0
  20. data/bin/setcat +11 -0
  21. data/bin/tabchar +5 -0
  22. data/bin/uniq-ord +59 -0
  23. data/bin/uniqc +3 -0
  24. data/bin/wu +34 -0
  25. data/bin/wu-clean-encoding +31 -0
  26. data/bin/wu-date +13 -0
  27. data/bin/wu-datetime +13 -0
  28. data/bin/wu-hist +3 -0
  29. data/bin/wu-lign +186 -0
  30. data/bin/wu-local +4 -0
  31. data/bin/wu-plus +9 -0
  32. data/bin/wu-source +5 -0
  33. data/bin/wu-sum +31 -0
  34. data/diagrams/wu_local.dot +39 -0
  35. data/diagrams/wu_local.dot.png +0 -0
  36. data/examples/Gemfile +38 -0
  37. data/examples/README.md +9 -0
  38. data/examples/basic/string_reverser.rb +23 -0
  39. data/examples/basic/tiny_count.rb +8 -0
  40. data/examples/basic/word_count/accumulator.rb +26 -0
  41. data/examples/basic/word_count/tokenizer.rb +13 -0
  42. data/examples/basic/word_count/word_count.rb +6 -0
  43. data/examples/dataflow/scraper_macro_flow.rb +28 -0
  44. data/examples/deploy_pack/Gemfile +6 -0
  45. data/examples/deploy_pack/README.md +6 -0
  46. data/examples/deploy_pack/a/b/c/.gitkeep +0 -0
  47. data/examples/deploy_pack/app/processors/string_reverser.rb +5 -0
  48. data/examples/deploy_pack/config/environment.rb +1 -0
  49. data/examples/dsl/dataflow/fibonacci_series.rb +101 -0
  50. data/examples/dsl/dataflow/scraper_macro_flow.rb +28 -0
  51. data/examples/dsl/dataflow/simple.rb +12 -0
  52. data/examples/dsl/dataflow/telegram.rb +45 -0
  53. data/examples/dsl/workflow/cherry_pie.dot +97 -0
  54. data/examples/dsl/workflow/cherry_pie.md +104 -0
  55. data/examples/dsl/workflow/cherry_pie.png +0 -0
  56. data/examples/dsl/workflow/cherry_pie.rb +101 -0
  57. data/examples/empty/.gitkeep +0 -0
  58. data/examples/examples_helper.rb +9 -0
  59. data/examples/geo.rb +4 -0
  60. data/examples/geo/geo_grids.numbers +0 -0
  61. data/examples/geo/geolocated.rb +331 -0
  62. data/examples/geo/quadtile.rb +69 -0
  63. data/examples/geo/spec/geolocated_spec.rb +247 -0
  64. data/examples/geo/tile_fetcher.rb +77 -0
  65. data/examples/graph/implied_geolocation/README.md +63 -0
  66. data/examples/graph/minimum_spanning_tree/airfares_graphviz.rb +73 -0
  67. data/examples/improver/tweet_summary.rb +73 -0
  68. data/examples/loadable.rb +2 -0
  69. data/examples/munging/airline_flights/airline_flights.rake +83 -0
  70. data/examples/munging/airline_flights/airplane.rb +0 -0
  71. data/examples/munging/airline_flights/airport_id_unification.rb +129 -0
  72. data/examples/munging/airline_flights/airport_ok_chars.rb +4 -0
  73. data/examples/munging/airline_flights/indexable.rb +75 -0
  74. data/examples/munging/airline_flights/indexable_spec.rb +90 -0
  75. data/examples/munging/airline_flights/reconcile_airports.rb +142 -0
  76. data/examples/munging/airline_flights/tasks.rake +83 -0
  77. data/examples/munging/airline_flights/topcities.rb +167 -0
  78. data/examples/munging/geo/geo_json.rb +54 -0
  79. data/examples/munging/geo/geo_models.rb +69 -0
  80. data/examples/munging/geo/geonames_models.rb +107 -0
  81. data/examples/munging/geo/iso_codes.rb +172 -0
  82. data/examples/munging/geo/reconcile_countries.rb +124 -0
  83. data/examples/munging/geo/tasks.rake +71 -0
  84. data/examples/munging/wikipedia/articles/extract_articles-parsed.rb +79 -0
  85. data/examples/munging/wikipedia/articles/extract_articles-templated.rb +136 -0
  86. data/examples/munging/wikipedia/articles/textualize_articles.rb +54 -0
  87. data/examples/munging/wikipedia/articles/verify_structure.rb +43 -0
  88. data/examples/munging/wikipedia/articles/wp2txt-LICENSE.txt +22 -0
  89. data/examples/munging/wikipedia/articles/wp2txt_article.rb +259 -0
  90. data/examples/munging/wikipedia/articles/wp2txt_utils.rb +452 -0
  91. data/examples/munging/wikipedia/dbpedia/dbpedia_common.rb +5 -0
  92. data/examples/munging/wikipedia/dbpedia/dbpedia_extract_geocoordinates.rb +78 -0
  93. data/examples/munging/wikipedia/dbpedia/extract_links-cruft.rb +66 -0
  94. data/examples/munging/wikipedia/dbpedia/extract_links.rb +260 -0
  95. data/examples/munging/wikipedia/dbpedia/sameas_extractor.rb +20 -0
  96. data/examples/rake_helper.rb +97 -0
  97. data/examples/ruby_project/Gemfile +6 -0
  98. data/examples/ruby_project/README.md +6 -0
  99. data/examples/ruby_project/a/b/c/.gitkeep +0 -0
  100. data/examples/server_logs/geo_ip_mapping/munge_geolite.rb +82 -0
  101. data/examples/server_logs/logline.rb +95 -0
  102. data/examples/server_logs/models.rb +66 -0
  103. data/examples/server_logs/page_counts.pig +48 -0
  104. data/examples/server_logs/server_logs-01-parse-script.rb +13 -0
  105. data/examples/server_logs/server_logs-02-histograms-full.rb +33 -0
  106. data/examples/server_logs/server_logs-02-histograms-mapper.rb +14 -0
  107. data/examples/server_logs/server_logs-03-breadcrumbs-full.rb +71 -0
  108. data/examples/server_logs/server_logs-04-page_page_edges-full.rb +40 -0
  109. data/examples/serverlogs/geo_ip_mapping/munge_geolite.rb +82 -0
  110. data/examples/serverlogs/models/logline.rb +102 -0
  111. data/examples/serverlogs/parser/apache_parser_widget.rb +46 -0
  112. data/examples/serverlogs/visit_paths/common.rb +4 -0
  113. data/examples/serverlogs/visit_paths/page_counts.pig +48 -0
  114. data/examples/serverlogs/visit_paths/serverlogs-01-parse-script.rb +11 -0
  115. data/examples/serverlogs/visit_paths/serverlogs-02-histograms-full.rb +31 -0
  116. data/examples/serverlogs/visit_paths/serverlogs-02-histograms-mapper.rb +12 -0
  117. data/examples/serverlogs/visit_paths/serverlogs-03-breadcrumbs-full.rb +67 -0
  118. data/examples/serverlogs/visit_paths/serverlogs-04-page_page_edges-full.rb +38 -0
  119. data/examples/splitter.rb +94 -0
  120. data/examples/string_reverser.rb +7 -0
  121. data/examples/text/pig_latin/pig_latinizer.rb +35 -0
  122. data/examples/text/pig_latin/pig_latinizer_widget.rb +16 -0
  123. data/examples/text/regional_flavor/README.md +14 -0
  124. data/examples/text/regional_flavor/article_wordbags.pig +39 -0
  125. data/examples/text/regional_flavor/j01-article_wordbags.rb +4 -0
  126. data/examples/text/regional_flavor/simple_pig_script.pig +27 -0
  127. data/examples/twitter.rb +5 -0
  128. data/lib/hanuman.rb +36 -0
  129. data/lib/hanuman/graph.rb +97 -0
  130. data/lib/hanuman/graphvizzer.rb +206 -0
  131. data/lib/hanuman/graphvizzer/gv_models.rb +161 -0
  132. data/lib/hanuman/graphvizzer/gv_presenter.rb +97 -0
  133. data/lib/hanuman/link.rb +35 -0
  134. data/lib/hanuman/registry.rb +46 -0
  135. data/lib/hanuman/stage.rb +128 -0
  136. data/lib/hanuman/tree.rb +67 -0
  137. data/lib/wu/geo.rb +4 -0
  138. data/lib/wu/geo/geo_grids.numbers +0 -0
  139. data/lib/wu/geo/geolocated.rb +331 -0
  140. data/lib/wu/geo/quadtile.rb +69 -0
  141. data/lib/wu/graph/union_find.rb +62 -0
  142. data/lib/wu/model/reconcilable.rb +63 -0
  143. data/lib/wu/munging.rb +71 -0
  144. data/lib/wu/social/models/twitter.rb +31 -0
  145. data/lib/wu/wikipedia/models.rb +20 -0
  146. data/lib/wukong.rb +54 -0
  147. data/lib/wukong/dataflow.rb +43 -0
  148. data/lib/wukong/doc_helpers.rb +14 -0
  149. data/lib/wukong/doc_helpers/dataflow_handler.rb +29 -0
  150. data/lib/wukong/doc_helpers/field_handler.rb +91 -0
  151. data/lib/wukong/doc_helpers/processor_handler.rb +29 -0
  152. data/lib/wukong/driver.rb +214 -0
  153. data/lib/wukong/driver/event_machine_driver.rb +15 -0
  154. data/lib/wukong/driver/wiring.rb +68 -0
  155. data/lib/wukong/local.rb +42 -0
  156. data/lib/wukong/local/runner.rb +96 -0
  157. data/lib/wukong/local/stdio_driver.rb +104 -0
  158. data/lib/wukong/logger.rb +102 -0
  159. data/lib/wukong/model/faker.rb +136 -0
  160. data/lib/wukong/model/flatpack_parser/flat.rb +60 -0
  161. data/lib/wukong/model/flatpack_parser/flatpack.rb +4 -0
  162. data/lib/wukong/model/flatpack_parser/lang.rb +46 -0
  163. data/lib/wukong/model/flatpack_parser/parser.rb +55 -0
  164. data/lib/wukong/model/flatpack_parser/tokens.rb +130 -0
  165. data/lib/wukong/plugin.rb +48 -0
  166. data/lib/wukong/processor.rb +110 -0
  167. data/lib/wukong/rake_helper.rb +6 -0
  168. data/lib/wukong/runner.rb +169 -0
  169. data/lib/wukong/runner/boot_sequence.rb +123 -0
  170. data/lib/wukong/runner/code_loader.rb +52 -0
  171. data/lib/wukong/runner/command_runner.rb +44 -0
  172. data/lib/wukong/runner/deploy_pack_loader.rb +75 -0
  173. data/lib/wukong/runner/help_message.rb +42 -0
  174. data/lib/wukong/source.rb +33 -0
  175. data/lib/wukong/source/source_driver.rb +74 -0
  176. data/lib/wukong/source/source_runner.rb +38 -0
  177. data/lib/wukong/spec_helpers.rb +74 -0
  178. data/lib/wukong/spec_helpers/integration_tests.rb +150 -0
  179. data/lib/wukong/spec_helpers/integration_tests/integration_test_matchers.rb +207 -0
  180. data/lib/wukong/spec_helpers/integration_tests/integration_test_runner.rb +97 -0
  181. data/lib/wukong/spec_helpers/shared_examples.rb +22 -0
  182. data/lib/wukong/spec_helpers/unit_tests.rb +135 -0
  183. data/lib/wukong/spec_helpers/unit_tests/unit_test_driver.rb +132 -0
  184. data/lib/wukong/spec_helpers/unit_tests/unit_test_matchers.rb +169 -0
  185. data/lib/wukong/spec_helpers/unit_tests/unit_test_runner.rb +60 -0
  186. data/lib/wukong/version.rb +3 -0
  187. data/lib/wukong/widget/echo.rb +55 -0
  188. data/lib/wukong/widget/extract.rb +122 -0
  189. data/lib/wukong/widget/filters.rb +452 -0
  190. data/lib/wukong/widget/logger.rb +56 -0
  191. data/lib/wukong/widget/operators.rb +82 -0
  192. data/lib/wukong/widget/reducers.rb +10 -0
  193. data/lib/wukong/widget/reducers/accumulator.rb +73 -0
  194. data/lib/wukong/widget/reducers/bin.rb +368 -0
  195. data/lib/wukong/widget/reducers/count.rb +73 -0
  196. data/lib/wukong/widget/reducers/group.rb +128 -0
  197. data/lib/wukong/widget/reducers/group_concat.rb +98 -0
  198. data/lib/wukong/widget/reducers/improver.rb +71 -0
  199. data/lib/wukong/widget/reducers/join_xml.rb +37 -0
  200. data/lib/wukong/widget/reducers/moments.rb +72 -0
  201. data/lib/wukong/widget/reducers/sort.rb +180 -0
  202. data/lib/wukong/widget/reducers/uniq.rb +91 -0
  203. data/lib/wukong/widget/serializers.rb +317 -0
  204. data/lib/wukong/widget/utils.rb +46 -0
  205. data/lib/wukong/widgets.rb +7 -0
  206. data/spec/examples/dataflow/fibonacci_series_spec.rb +18 -0
  207. data/spec/examples/dataflow/parse_apache_logs_spec.rb +8 -0
  208. data/spec/examples/dataflow/parsing_spec.rb +14 -0
  209. data/spec/examples/dataflow/simple_spec.rb +34 -0
  210. data/spec/examples/dataflow/telegram_spec.rb +43 -0
  211. data/spec/examples/graph/minimum_spanning_tree_spec.rb +34 -0
  212. data/spec/examples/munging/airline_flights/identifiers_spec.rb +16 -0
  213. data/spec/examples/munging/airline_flights_spec.rb +202 -0
  214. data/spec/examples/text/pig_latin_spec.rb +18 -0
  215. data/spec/examples/workflow/cherry_pie_spec.rb +36 -0
  216. data/spec/hanuman/graph_spec.rb +119 -0
  217. data/spec/hanuman/hanuman_spec.rb +10 -0
  218. data/spec/hanuman/registry_spec.rb +123 -0
  219. data/spec/hanuman/stage_spec.rb +81 -0
  220. data/spec/hanuman/tree_spec.rb +119 -0
  221. data/spec/spec.opts +1 -0
  222. data/spec/spec_helper.rb +43 -0
  223. data/spec/support/example_test_helpers.rb +95 -0
  224. data/spec/support/hanuman_test_helpers.rb +92 -0
  225. data/spec/support/integration_helper.rb +38 -0
  226. data/spec/support/model_test_helpers.rb +115 -0
  227. data/spec/support/shared_context_for_graphs.rb +57 -0
  228. data/spec/support/shared_context_for_reducers.rb +37 -0
  229. data/spec/support/shared_examples_for_builders.rb +94 -0
  230. data/spec/support/shared_examples_for_shortcuts.rb +57 -0
  231. data/spec/wu/model/reconcilable_spec.rb +152 -0
  232. data/spec/wukong/dataflow_spec.rb +87 -0
  233. data/spec/wukong/driver_spec.rb +154 -0
  234. data/spec/wukong/local/runner_spec.rb +29 -0
  235. data/spec/wukong/local/stdio_driver_spec.rb +73 -0
  236. data/spec/wukong/local_spec.rb +6 -0
  237. data/spec/wukong/logger_spec.rb +49 -0
  238. data/spec/wukong/model/faker_spec.rb +132 -0
  239. data/spec/wukong/processor_spec.rb +21 -0
  240. data/spec/wukong/runner_spec.rb +132 -0
  241. data/spec/wukong/source_spec.rb +6 -0
  242. data/spec/wukong/widget/extract_spec.rb +101 -0
  243. data/spec/wukong/widget/filters_spec.rb +79 -0
  244. data/spec/wukong/widget/logger_spec.rb +23 -0
  245. data/spec/wukong/widget/operators_spec.rb +25 -0
  246. data/spec/wukong/widget/reducers/bin_spec.rb +92 -0
  247. data/spec/wukong/widget/reducers/count_spec.rb +11 -0
  248. data/spec/wukong/widget/reducers/group_spec.rb +21 -0
  249. data/spec/wukong/widget/reducers/join_xml_spec.rb +25 -0
  250. data/spec/wukong/widget/reducers/moments_spec.rb +36 -0
  251. data/spec/wukong/widget/reducers/sort_spec.rb +26 -0
  252. data/spec/wukong/widget/reducers/uniq_spec.rb +14 -0
  253. data/spec/wukong/widget/serializers_spec.rb +114 -0
  254. data/spec/wukong/widget/sink_spec.rb +19 -0
  255. data/spec/wukong/widget/source_spec.rb +65 -0
  256. data/spec/wukong/wu-local_spec.rb +109 -0
  257. data/spec/wukong/wu-source_spec.rb +32 -0
  258. data/spec/wukong/wu_spec.rb +14 -0
  259. data/spec/wukong/wukong_spec.rb +10 -0
  260. data/wukong.gemspec +35 -0
  261. metadata +465 -0
@@ -0,0 +1,46 @@
1
+ module Wukong
2
+
3
+ # :nodoc:
4
+ #
5
+ # This code is gross and nasty.
6
+ module DynamicGet
7
+
8
+ # :nodoc:
9
+ def self.included klass
10
+ klass.send(:field, :separator, String, :default => "\t", :doc => "The default separator between fields on a single line")
11
+ end
12
+
13
+ # :nodoc:
14
+ def get field, obj
15
+ return obj unless field
16
+ case
17
+ when field.to_s.to_i > 0 && obj.is_a?(String)
18
+ obj.split(separator)[field.to_s.to_i - 1]
19
+ when field.to_s.to_i > 0
20
+ obj[field.to_s.to_i - 1]
21
+ when field.to_s.to_i == 0 && obj.is_a?(String) && obj =~ /^\s*\{/
22
+ begin
23
+ get_nested(field, MultiJson.load(obj))
24
+ rescue MultiJson::DecodeError => e
25
+ end
26
+ when field.to_s.to_i == 0 && (!field.to_s.include?('.')) && obj.respond_to?(field.to_s)
27
+ obj.send(field.to_s)
28
+ when field.to_s.to_i == 0 && obj.respond_to?(:[])
29
+ get_nested(field, obj)
30
+ else obj
31
+ end
32
+ end
33
+
34
+ # :nodoc:
35
+ def get_nested fields, obj
36
+ parts = fields.to_s.split('.')
37
+ field = parts.shift
38
+ return unless field
39
+ if slice = obj[field]
40
+ return slice if parts.empty?
41
+ get_nested(parts.join('.'), slice)
42
+ end
43
+ end
44
+
45
+ end
46
+ end
@@ -0,0 +1,7 @@
1
+ require 'wukong/widget/filters'
2
+ require 'wukong/widget/serializers'
3
+ require 'wukong/widget/operators'
4
+ require 'wukong/widget/reducers'
5
+ require 'wukong/widget/extract'
6
+ require 'wukong/widget/logger'
7
+ require 'wukong/widget/echo'
@@ -0,0 +1,18 @@
1
+ require 'spec_helper'
2
+ # require 'wukong'
3
+
4
+ # describe_example_script :fibonacci_series, 'dataflow/fibonacci_series.rb', examples_spec: true do
5
+ # subject{ Wukong.chain(:fibbonaci_series) }
6
+
7
+ # it 'generates a fibonacci sequence' do
8
+ # subject.ticker.qty(12)
9
+ # # subject.output > subject.array_sink(name: :numbers)
10
+ # # subject.setup
11
+ # # subject.ticker.drive
12
+ # #
13
+ # # subject.numbers.records.should == [0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89]
14
+ # end
15
+
16
+ # it_generates_graphviz{|gv_filename| puts File.read(gv_filename) }
17
+
18
+ # end
@@ -0,0 +1,8 @@
1
+ # require 'spec_helper'
2
+ # require 'wukong'
3
+ #
4
+ # describe_example_script(:parsing, 'dataflow/parsing.rb') do
5
+ # it 'runs' do
6
+ # Wukong::LocalRunner.run(subject, :default)
7
+ # end
8
+ # end
@@ -0,0 +1,14 @@
1
+ require 'spec_helper'
2
+ # require 'wukong'
3
+
4
+ # describe_example_script(:parse_apache_logs, 'dataflow/parse_apache_logs.rb') do
5
+ # it 'runs' do
6
+ # subject = Wukong.dataflow(:parse_apache_logs)
7
+ # out, err = Gorillib::TestHelpers.capture_output do
8
+ # Wukong::LocalRunner.receive(:flow => subject) do
9
+ # run :default
10
+ # end
11
+ # end
12
+ # out.string.split("\n").first.should == "127.0.0.1 - - [10/Apr/2007:10:39:11 +0300] \"GET / HTTP/1.1\" 500 606 \"-\" \"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.3) Gecko/20061201 Firefox/2.0.0.3 (Ubuntu-feisty)\"\t"
13
+ # end
14
+ # end
@@ -0,0 +1,34 @@
1
+ require 'spec_helper'
2
+ # require 'wukong'
3
+
4
+ # Hanuman::Graph.class_eval do
5
+ # def foo_graph(label, &block)
6
+ # stage(label, :_type => Hanuman::FooGraph, &block)
7
+ # end
8
+ # end
9
+ # class Hanuman::FooGraph < Hanuman::Graph
10
+ # # field :inputs, Gorillib::Collection, :of => Hanuman::InputSlot, :doc => 'inputs to this stage', :default => ->{ Gorillib::Collection.new }
11
+ # # field :outputs, Gorillib::Collection, :of => Hanuman::OutputSlot, :doc => 'outputs of this stage', :default => ->{ Gorillib::Collection.new }
12
+ #
13
+ # collection :inputs, Hanuman::InputSlot
14
+ #
15
+ # end
16
+
17
+ # describe 'example', :examples_spec do
18
+ # # describe_example_script(:simple, 'dataflow/simple.rb', :only => true) do
19
+ # # it 'runs' do
20
+ # # p subject
21
+ # # end
22
+ # # end
23
+
24
+ # it 'runs' do
25
+ # # load Pathname.path_to(:examples, 'dataflow/simple.rb')
26
+
27
+ # Wukong.dataflow(:bob) do
28
+ # ff = file_source(Pathname.path_to(:data, 'text/jabberwocky.txt')){ p self }
29
+
30
+
31
+ # end
32
+
33
+ # end
34
+ # end
@@ -0,0 +1,43 @@
1
+ require 'spec_helper'
2
+ # require 'wukong'
3
+
4
+ # describe_example_script(:telegram, 'dataflow/telegram.rb') do
5
+ # it 'runs' do
6
+ # Wukong::LocalRunner.run(subject, :default)
7
+ # end
8
+
9
+ # context 'Recompose processor' do
10
+ # subject{ Wukong::Widget::Recompose }
11
+ # its(:field_names){ should include(:break_length) }
12
+
13
+ # let(:words ){
14
+ # # 0 5 1 5 2 5 3 5 4 5 5 5 6 5 7 5 8
15
+ # %w[
16
+ # If names be not correct, language is not in accordance with
17
+ # the truth of things. If language be not in accordance with
18
+ # the truth of things, affairs cannot be carried on to success. ] }
19
+
20
+ # context '#process' do
21
+ # it 'breaks lines correctly' do
22
+ # (2..80).each do |len|
23
+ # # run the data flow into an array sink
24
+ # test_sink = Wukong::Sink::ArraySink.new
25
+ # rc = subject.new(:break_length => len, :output => test_sink )
26
+ # words.each{|word| rc.process(word) }
27
+ # rc.stop
28
+ # # start and end are correct
29
+ # test_sink.records.first.should =~ /^If/
30
+ # test_sink.records.last.should =~ /success\.$/
31
+ # # lines should be as long as possible, but not longer
32
+ # test_sink.records[0..-2].zip(test_sink.records[1..-1]) do |line, nextl|
33
+ # nextw = nextl.split[0]
34
+ # ((line.length <= len) || line !~ /\s/).should be_true
35
+ # (line.length + nextw.length + 1 > len).should be_true
36
+ # end
37
+ # end
38
+ # end
39
+
40
+ # end
41
+ # end
42
+
43
+ # end
@@ -0,0 +1,34 @@
1
+ # require 'spec_helper'
2
+ # require 'wukong'
3
+
4
+ # load Pathname.path_to(:examples, 'graph/minimum_spanning_tree.rb')
5
+
6
+ # describe 'Minimum Spanning Tree', :examples_spec, :helpers do
7
+
8
+ # context Wukong::Widget::DisjointForest do
9
+ # subject{ Wukong::Widget::DisjointForest.new }
10
+
11
+ # context 'operations' do
12
+ # before do
13
+ # %w[ AUS DFW ATL JFK SFO LGA LAX ].each{|el| subject.add el }
14
+ # subject.union('DFW', 'AUS')
15
+ # subject.union('ATL', 'JFK')
16
+ # subject.union('ATL', 'DFW')
17
+ # end
18
+
19
+ # context '#find' do
20
+ # it 'collapses elements into a shallow tree during a find' do
21
+ # subject.parent['ATL'].should == 'JFK'
22
+ # subject.parent['JFK'].should == 'AUS'
23
+ # subject.find('ATL').should == 'AUS'
24
+ # subject.parent['ATL'].should == 'AUS'
25
+ # end
26
+ # end
27
+ # context '#union' do
28
+ # it 'joins shallow tree to deep tree' do
29
+ # end
30
+ # end
31
+ # end
32
+
33
+ # end
34
+ # end
@@ -0,0 +1,16 @@
1
+ # require 'spec_helper'
2
+ # require 'wukong'
3
+
4
+ # load Pathname.path_to(:examples, 'munging/airline_flights/identifiers.rb')
5
+
6
+ # describe Airport::IdMapping, :only do
7
+ # it 'loads and reconciles' do
8
+ # described_class.load(Pathname.path_to(:data, 'airline_flights'))
9
+ # #
10
+ # Airport::IdMapping::ID_MAPPINGS.each do |identifier, hsh|
11
+ # hsh.each do |id, id_mapping|
12
+ # # puts [identifier, id, id_mapping.to_tsv].join("\t")
13
+ # end
14
+ # end
15
+ # end
16
+ # end
@@ -0,0 +1,202 @@
1
+ # require 'spec_helper'
2
+ # require 'wukong'
3
+ # require 'gorillib/datetime/parse'
4
+
5
+ # load Pathname.path_to(:examples, 'munging/airline_flights/models.rb')
6
+
7
+ # describe 'Airline Flight Delays Dataset' do
8
+ # let(:example_tuple ){ ["2007", "1", "1", "1", "1232", "1225", "1341", "1340", "WN", "2891", "N351", "69", "75", "54", "1", "7", "SMF", "ONT", "389", "4", "11", "0", "", "0", "0", "0", "0", "0", "0"] }
9
+ # let(:cancelled_tuple_a){ ["2007", "1", "1", "1", "NA", "2030", "NA", "2135", "WN", "2734", "0", "NA", "65", "NA", "NA", "NA", "SNA", "LAS", "226", "0", "0", "1", "A", "0", "0", "0", "0", "0", "0"] }
10
+ # let(:cancelled_tuple_c){ ["2007", "1", "4", "4", "NA", "2120", "NA", "2125", "WN", "1631", "0", "NA", "65", "NA", "NA", "NA", "PHX", "SAN", "304", "0", "0", "1", "C", "0", "0", "0", "0", "0", "0"] }
11
+ # let(:diverted_tuple ){ ["2007", "1", "12", "5", "1054", "1054", "NA", "1209", "EV", "4351", "N857AS", "NA", "135", "NA", "NA", "0", "ATL", "TUL", "674", "0", "11", "0", "", "1", "0", "0", "0", "0", "0"] }
12
+
13
+ # let(:raw_flight ){ RawAirlineFlight.from_tuple(*example_tuple) }
14
+ # let(:raw_cancelled ){ RawAirlineFlight.from_tuple(*cancelled_tuple_a) }
15
+ # let(:raw_diverted ){ RawAirlineFlight.from_tuple(*diverted_tuple) }
16
+ # let(:example_flight ){ raw_flight.to_airline_flight }
17
+ # let(:cancelled_flight ){ raw_cancelled.to_airline_flight }
18
+ # let(:diverted_flight ){ raw_diverted.to_airline_flight }
19
+
20
+ # let(:de_airports_filename ){ Pathname.path_to(:data, 'airline_flights/dataexpo_airports-raw.csv') }
21
+
22
+ # let(:raw_airports_filename ){ Pathname.path_to(:data, 'airline_flights/openflights_airports-raw.csv') }
23
+ # let(:raw_airlines_filename ){ Pathname.path_to(:data, 'airline_flights/openflights_airlines-raw-sample.csv') }
24
+
25
+ # let(:example_flight_attrs) { {
26
+ # flight_datestr: '20070101', unique_carrier: "WN", flight_num: 2891,
27
+ # from_airport: "SMF", into_airport: "ONT", tail_num: "N351", distance_km: 626, day_of_week: 1,
28
+ # crs_dep_itime: 1167654300, crs_arr_itime: 1167658800,
29
+ # act_dep_itime: 1167654720, act_arr_itime: 1167658860,
30
+ # crs_dep_tod: "1225", crs_arr_tod: "1340",
31
+ # act_dep_tod: "1232", act_arr_tod: "1341",
32
+ # crs_duration: 75, act_duration: 69, air_duration: 54, taxi_in_duration: 4, taxi_out_duration: 11,
33
+ # is_diverted: false, is_cancelled: false, cancellation_code: "Z",
34
+ # dep_delay: 7, arr_delay: 1, carrier_delay: 0, weather_delay: 0, nas_delay: 0, security_delay: 0, late_aircraft_delay: 0,
35
+ # } }
36
+
37
+ # describe RawAirlineFlight do
38
+ # subject{ raw_flight }
39
+
40
+ # it 'loads from a hash' do
41
+ # p subject.compact_attributes
42
+ # subject.compact_attributes.should == {
43
+ # date_year: 2007, date_month: 1, date_day: 1, day_of_week: 1,
44
+ # act_arr_tod: "1341", act_dep_tod: "1232",
45
+ # crs_arr_tod: "1340", crs_dep_tod: "1225",
46
+ # # act_arr_itime: 1167658860, act_dep_itime: 1167654720,
47
+ # # crs_arr_itime: 1167658800, crs_dep_itime: 1167654300,
48
+ # unique_carrier: "WN", flight_num: 2891, tail_num: "N351",
49
+ # act_duration: 69, crs_duration: 75, air_duration: 54, arr_delay: 1, dep_delay: 7,
50
+ # from_airport: "SMF", into_airport: "ONT", distance_mi: 389, taxi_in_duration: 4, taxi_out_duration: 11,
51
+ # is_cancelled: false, cancellation_code: "Z", is_diverted: false,
52
+ # carrier_delay: 0, weather_delay: 0, nas_delay: 0, security_delay: 0, late_aircraft_delay: 0,
53
+ # }
54
+ # end
55
+
56
+ # it 'loads cancelled flights OK' do
57
+ # # ff = RawAirlineFlight.fields[:act_dep_itime].type
58
+ # flight = described_class.from_tuple(*cancelled_tuple_a)
59
+ # p flight.compact_attributes
60
+ # flight.compact_attributes.should == {
61
+ # date_year: 2007, date_month: 1, date_day: 1, day_of_week: 1,
62
+ # act_arr_tod: nil, act_dep_tod: nil,
63
+ # crs_arr_tod: "2135", crs_dep_tod: "2030",
64
+ # # act_arr_itime: nil, act_dep_itime: nil,
65
+ # # crs_arr_itime: 1167687300, crs_dep_itime: 1167683400,
66
+ # unique_carrier: "WN", flight_num: 2734, tail_num: nil,
67
+ # act_duration: nil, crs_duration: 65, air_duration: nil, arr_delay: nil, dep_delay: nil,
68
+ # from_airport: "SNA", into_airport: "LAS", distance_mi: 226, taxi_in_duration: 0, taxi_out_duration: 0,
69
+ # is_cancelled: true, cancellation_code: "A", is_diverted: false,
70
+ # carrier_delay: 0, weather_delay: 0, nas_delay: 0, security_delay: 0, late_aircraft_delay: 0,
71
+ # }
72
+ # end
73
+
74
+ # it 'loads diverted flights OK' do
75
+ # # ff = RawAirlineFlight.fields[:act_dep_itime].type
76
+ # flight = described_class.from_tuple(*diverted_tuple)
77
+ # p flight.compact_attributes
78
+ # flight.compact_attributes.should == {
79
+ # date_year: 2007, date_month: 1, date_day: 12, day_of_week: 5,
80
+ # act_arr_tod: nil, act_dep_tod: "1054",
81
+ # crs_arr_tod: "1209", crs_dep_tod: "1054",
82
+ # # act_arr_itime: nil, act_dep_itime: 1168599240,
83
+ # # crs_arr_itime: 1168603740, crs_dep_itime: 1168599240,
84
+ # unique_carrier: "EV", flight_num: 4351, tail_num: "N857AS",
85
+ # act_duration: nil, crs_duration: 135, air_duration: nil, arr_delay: nil, dep_delay: 0,
86
+ # from_airport: "ATL", into_airport: "TUL", distance_mi: 674, taxi_in_duration: 0, taxi_out_duration: 11,
87
+ # is_cancelled: false, cancellation_code: "Z", is_diverted: true,
88
+ # carrier_delay: 0, weather_delay: 0, nas_delay: 0, security_delay: 0, late_aircraft_delay: 0,
89
+ # }
90
+ # end
91
+
92
+ # it 'does dates right' do
93
+ # { normal: [example_tuple, raw_flight],
94
+ # cancelled: [cancelled_tuple_a, raw_cancelled],
95
+ # diverted: [diverted_tuple, raw_diverted],
96
+ # }.each do |label, (raw_values, raw_flight)|
97
+ # [ [raw_flight.act_dep_itime, raw_values[4] ],
98
+ # [raw_flight.crs_dep_itime, raw_values[5] ],
99
+ # [raw_flight.act_arr_itime, raw_values[6] ],
100
+ # [raw_flight.crs_arr_itime, raw_values[7] ],
101
+ # ].each do |itime, hhmm|
102
+ # next unless itime
103
+ # tm = Time.at(itime).utc
104
+ # (tm.hour * 100 + tm.min).to_s.should == hhmm
105
+ # end
106
+ # end
107
+ # end
108
+
109
+ # it 'receives idempotently' do
110
+ # subject.should == RawAirlineFlight.receive(subject.compact_attributes)
111
+ # end
112
+
113
+ # it '#to_airline_flight' do
114
+ # flight = subject.to_airline_flight
115
+ # flight.should be_a(AirlineFlight)
116
+ # flight.compact_attributes.should == example_flight_attrs
117
+ # end
118
+ # end
119
+
120
+ # describe AirlineFlight do
121
+ # subject{ example_flight }
122
+
123
+ # it "makes sense" do
124
+ # { normal: example_flight, cancelled: cancelled_flight, diverted: diverted_flight
125
+ # }.each do |label, flight|
126
+ # linted = subject.lint
127
+ # p [label, linted, flight] unless linted.values.all?
128
+ # linted.values.should be_all
129
+ # end
130
+ # end
131
+
132
+ # it 'has correct field alignment' do
133
+ # described_class.field_names.should == example_flight_attrs.keys
134
+ # described_class.fields.values.map(&:position).should == (0..30).to_a
135
+ # end
136
+
137
+ # it 'calculates local times correctly' do
138
+ # Airport.load(raw_airports_filename)
139
+ # Airport::AIRPORTS.each{|id,airport| puts airport.to_tsv }
140
+ # end
141
+
142
+ # end
143
+
144
+ # describe 'parsing raw' do
145
+ # it 'works' do
146
+ # raw_file = File.open(raw_airlines_filename)
147
+ # raw_file.readline
148
+ # puts AirlineFlight.field_names.map{|fn| fn[0..6] }.join("\t")
149
+ # raw_file.each do |line|
150
+ # tuple = line.split(',')
151
+ # # next unless tuple[23] == "1"
152
+ # raw_flight = RawAirlineFlight.from_tuple(*tuple)
153
+ # flight = raw_flight.to_airline_flight
154
+ # # if not flight.lint.values.all?
155
+ # # p flight.lint.values
156
+ # puts flight.to_tsv
157
+ # # p [
158
+ # # [raw_flight.crs_dep_itime, tuple[5] ],
159
+ # # [raw_flight.crs_arr_itime, tuple[7] ],
160
+ # # (raw_flight.crs_arr_itime - raw_flight.crs_arr_itime),
161
+ # # (raw_flight.crs_arr_itime - raw_flight.crs_dep_itime)/60.0,
162
+ # # raw_flight.crs_duration
163
+ # # ]
164
+ # # end
165
+ # end
166
+ # end
167
+ # end
168
+
169
+ # describe RawDataexpoAirport do
170
+ # it 'works' do
171
+ # puts described_class.field_names.map{|fn| fn[0..6] }.join("\t")
172
+ # raw_airports = RawDataexpoAirport.load_csv(de_airports_filename)
173
+ # raw_airports.each do |airport|
174
+ # puts airport.to_tsv
175
+ # end
176
+ # end
177
+ # end
178
+
179
+ # describe RawOpenflightAirport do
180
+ # it 'works' do
181
+ # puts described_class.field_names.join("\t") # .map{|fn| fn[0..6] }.join("\t")
182
+ # raw_airports = described_class.load_csv(raw_airports_filename)
183
+ # raw_airports.each do |airport|
184
+ # # puts airport.to_tsv
185
+ # linted = airport.lint
186
+ # puts [airport.iata, airport.icao, linted.inspect, airport.to_tsv, ].join("\t") if linted.present?
187
+ # end
188
+ # end
189
+ # end
190
+
191
+ # describe Airport do
192
+ # it 'loads and reconciles' do
193
+ # Airport.load(raw_airports_filename, de_airports_filename)
194
+ # Airport::AIRPORTS.each{|id,airport|
195
+ # #puts airport.to_tsv
196
+ # linted = airport.lint
197
+ # warn [airport.iata, airport.icao, airport.de_iata, "%-25s" % airport.name, linted.inspect].join("\t") if linted.present?
198
+ # }
199
+ # end
200
+ # end
201
+
202
+ # end
@@ -0,0 +1,18 @@
1
+ require 'spec_helper'
2
+ # require 'wukong'
3
+
4
+ # describe_example_script(:pig_latin, 'text/pig_latin.rb') do
5
+
6
+ # context 'processor' do
7
+ # subject{ Wukong::Widget::PigLatinize.new }
8
+ # it 'breaks text into pig latin' do
9
+ # subject.should_receive(:emit).with("Iway indfay ethay astramipay otay ebay ethay ostmay ensualsay ofway allway ethay altedsay uredcay eatsmay.")
10
+ # subject.process("I find the pastrami to be the most sensual of all the salted cured meats.")
11
+ # end
12
+ # end
13
+
14
+ # it 'runs' do
15
+ # Wukong::LocalRunner.run(ExampleUniverse.dataflow(:pig_latin), :default)
16
+ # end
17
+
18
+ # end