ul-wukong 4.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (261) hide show
  1. checksums.yaml +15 -0
  2. data/.gitignore +60 -0
  3. data/.gitmodules +6 -0
  4. data/.rspec +2 -0
  5. data/.travis.yml +19 -0
  6. data/.yardopts +6 -0
  7. data/CHANGELOG.md +7 -0
  8. data/Gemfile +17 -0
  9. data/Guardfile +12 -0
  10. data/LICENSE.md +95 -0
  11. data/NOTES-travis.md +31 -0
  12. data/README-old.md +422 -0
  13. data/README.md +1308 -0
  14. data/Rakefile +28 -0
  15. data/TODO.md +99 -0
  16. data/bin/cutc +30 -0
  17. data/bin/cuttab +5 -0
  18. data/bin/greptrue +6 -0
  19. data/bin/md5sort +20 -0
  20. data/bin/setcat +11 -0
  21. data/bin/tabchar +5 -0
  22. data/bin/uniq-ord +59 -0
  23. data/bin/uniqc +3 -0
  24. data/bin/wu +34 -0
  25. data/bin/wu-clean-encoding +31 -0
  26. data/bin/wu-date +13 -0
  27. data/bin/wu-datetime +13 -0
  28. data/bin/wu-hist +3 -0
  29. data/bin/wu-lign +186 -0
  30. data/bin/wu-local +4 -0
  31. data/bin/wu-plus +9 -0
  32. data/bin/wu-source +5 -0
  33. data/bin/wu-sum +31 -0
  34. data/diagrams/wu_local.dot +39 -0
  35. data/diagrams/wu_local.dot.png +0 -0
  36. data/examples/Gemfile +38 -0
  37. data/examples/README.md +9 -0
  38. data/examples/basic/string_reverser.rb +23 -0
  39. data/examples/basic/tiny_count.rb +8 -0
  40. data/examples/basic/word_count/accumulator.rb +26 -0
  41. data/examples/basic/word_count/tokenizer.rb +13 -0
  42. data/examples/basic/word_count/word_count.rb +6 -0
  43. data/examples/dataflow/scraper_macro_flow.rb +28 -0
  44. data/examples/deploy_pack/Gemfile +6 -0
  45. data/examples/deploy_pack/README.md +6 -0
  46. data/examples/deploy_pack/a/b/c/.gitkeep +0 -0
  47. data/examples/deploy_pack/app/processors/string_reverser.rb +5 -0
  48. data/examples/deploy_pack/config/environment.rb +1 -0
  49. data/examples/dsl/dataflow/fibonacci_series.rb +101 -0
  50. data/examples/dsl/dataflow/scraper_macro_flow.rb +28 -0
  51. data/examples/dsl/dataflow/simple.rb +12 -0
  52. data/examples/dsl/dataflow/telegram.rb +45 -0
  53. data/examples/dsl/workflow/cherry_pie.dot +97 -0
  54. data/examples/dsl/workflow/cherry_pie.md +104 -0
  55. data/examples/dsl/workflow/cherry_pie.png +0 -0
  56. data/examples/dsl/workflow/cherry_pie.rb +101 -0
  57. data/examples/empty/.gitkeep +0 -0
  58. data/examples/examples_helper.rb +9 -0
  59. data/examples/geo.rb +4 -0
  60. data/examples/geo/geo_grids.numbers +0 -0
  61. data/examples/geo/geolocated.rb +331 -0
  62. data/examples/geo/quadtile.rb +69 -0
  63. data/examples/geo/spec/geolocated_spec.rb +247 -0
  64. data/examples/geo/tile_fetcher.rb +77 -0
  65. data/examples/graph/implied_geolocation/README.md +63 -0
  66. data/examples/graph/minimum_spanning_tree/airfares_graphviz.rb +73 -0
  67. data/examples/improver/tweet_summary.rb +73 -0
  68. data/examples/loadable.rb +2 -0
  69. data/examples/munging/airline_flights/airline_flights.rake +83 -0
  70. data/examples/munging/airline_flights/airplane.rb +0 -0
  71. data/examples/munging/airline_flights/airport_id_unification.rb +129 -0
  72. data/examples/munging/airline_flights/airport_ok_chars.rb +4 -0
  73. data/examples/munging/airline_flights/indexable.rb +75 -0
  74. data/examples/munging/airline_flights/indexable_spec.rb +90 -0
  75. data/examples/munging/airline_flights/reconcile_airports.rb +142 -0
  76. data/examples/munging/airline_flights/tasks.rake +83 -0
  77. data/examples/munging/airline_flights/topcities.rb +167 -0
  78. data/examples/munging/geo/geo_json.rb +54 -0
  79. data/examples/munging/geo/geo_models.rb +69 -0
  80. data/examples/munging/geo/geonames_models.rb +107 -0
  81. data/examples/munging/geo/iso_codes.rb +172 -0
  82. data/examples/munging/geo/reconcile_countries.rb +124 -0
  83. data/examples/munging/geo/tasks.rake +71 -0
  84. data/examples/munging/wikipedia/articles/extract_articles-parsed.rb +79 -0
  85. data/examples/munging/wikipedia/articles/extract_articles-templated.rb +136 -0
  86. data/examples/munging/wikipedia/articles/textualize_articles.rb +54 -0
  87. data/examples/munging/wikipedia/articles/verify_structure.rb +43 -0
  88. data/examples/munging/wikipedia/articles/wp2txt-LICENSE.txt +22 -0
  89. data/examples/munging/wikipedia/articles/wp2txt_article.rb +259 -0
  90. data/examples/munging/wikipedia/articles/wp2txt_utils.rb +452 -0
  91. data/examples/munging/wikipedia/dbpedia/dbpedia_common.rb +5 -0
  92. data/examples/munging/wikipedia/dbpedia/dbpedia_extract_geocoordinates.rb +78 -0
  93. data/examples/munging/wikipedia/dbpedia/extract_links-cruft.rb +66 -0
  94. data/examples/munging/wikipedia/dbpedia/extract_links.rb +260 -0
  95. data/examples/munging/wikipedia/dbpedia/sameas_extractor.rb +20 -0
  96. data/examples/rake_helper.rb +97 -0
  97. data/examples/ruby_project/Gemfile +6 -0
  98. data/examples/ruby_project/README.md +6 -0
  99. data/examples/ruby_project/a/b/c/.gitkeep +0 -0
  100. data/examples/server_logs/geo_ip_mapping/munge_geolite.rb +82 -0
  101. data/examples/server_logs/logline.rb +95 -0
  102. data/examples/server_logs/models.rb +66 -0
  103. data/examples/server_logs/page_counts.pig +48 -0
  104. data/examples/server_logs/server_logs-01-parse-script.rb +13 -0
  105. data/examples/server_logs/server_logs-02-histograms-full.rb +33 -0
  106. data/examples/server_logs/server_logs-02-histograms-mapper.rb +14 -0
  107. data/examples/server_logs/server_logs-03-breadcrumbs-full.rb +71 -0
  108. data/examples/server_logs/server_logs-04-page_page_edges-full.rb +40 -0
  109. data/examples/serverlogs/geo_ip_mapping/munge_geolite.rb +82 -0
  110. data/examples/serverlogs/models/logline.rb +102 -0
  111. data/examples/serverlogs/parser/apache_parser_widget.rb +46 -0
  112. data/examples/serverlogs/visit_paths/common.rb +4 -0
  113. data/examples/serverlogs/visit_paths/page_counts.pig +48 -0
  114. data/examples/serverlogs/visit_paths/serverlogs-01-parse-script.rb +11 -0
  115. data/examples/serverlogs/visit_paths/serverlogs-02-histograms-full.rb +31 -0
  116. data/examples/serverlogs/visit_paths/serverlogs-02-histograms-mapper.rb +12 -0
  117. data/examples/serverlogs/visit_paths/serverlogs-03-breadcrumbs-full.rb +67 -0
  118. data/examples/serverlogs/visit_paths/serverlogs-04-page_page_edges-full.rb +38 -0
  119. data/examples/splitter.rb +94 -0
  120. data/examples/string_reverser.rb +7 -0
  121. data/examples/text/pig_latin/pig_latinizer.rb +35 -0
  122. data/examples/text/pig_latin/pig_latinizer_widget.rb +16 -0
  123. data/examples/text/regional_flavor/README.md +14 -0
  124. data/examples/text/regional_flavor/article_wordbags.pig +39 -0
  125. data/examples/text/regional_flavor/j01-article_wordbags.rb +4 -0
  126. data/examples/text/regional_flavor/simple_pig_script.pig +27 -0
  127. data/examples/twitter.rb +5 -0
  128. data/lib/hanuman.rb +36 -0
  129. data/lib/hanuman/graph.rb +97 -0
  130. data/lib/hanuman/graphvizzer.rb +206 -0
  131. data/lib/hanuman/graphvizzer/gv_models.rb +161 -0
  132. data/lib/hanuman/graphvizzer/gv_presenter.rb +97 -0
  133. data/lib/hanuman/link.rb +35 -0
  134. data/lib/hanuman/registry.rb +46 -0
  135. data/lib/hanuman/stage.rb +128 -0
  136. data/lib/hanuman/tree.rb +67 -0
  137. data/lib/wu/geo.rb +4 -0
  138. data/lib/wu/geo/geo_grids.numbers +0 -0
  139. data/lib/wu/geo/geolocated.rb +331 -0
  140. data/lib/wu/geo/quadtile.rb +69 -0
  141. data/lib/wu/graph/union_find.rb +62 -0
  142. data/lib/wu/model/reconcilable.rb +63 -0
  143. data/lib/wu/munging.rb +71 -0
  144. data/lib/wu/social/models/twitter.rb +31 -0
  145. data/lib/wu/wikipedia/models.rb +20 -0
  146. data/lib/wukong.rb +54 -0
  147. data/lib/wukong/dataflow.rb +43 -0
  148. data/lib/wukong/doc_helpers.rb +14 -0
  149. data/lib/wukong/doc_helpers/dataflow_handler.rb +29 -0
  150. data/lib/wukong/doc_helpers/field_handler.rb +91 -0
  151. data/lib/wukong/doc_helpers/processor_handler.rb +29 -0
  152. data/lib/wukong/driver.rb +214 -0
  153. data/lib/wukong/driver/event_machine_driver.rb +15 -0
  154. data/lib/wukong/driver/wiring.rb +68 -0
  155. data/lib/wukong/local.rb +42 -0
  156. data/lib/wukong/local/runner.rb +96 -0
  157. data/lib/wukong/local/stdio_driver.rb +104 -0
  158. data/lib/wukong/logger.rb +102 -0
  159. data/lib/wukong/model/faker.rb +136 -0
  160. data/lib/wukong/model/flatpack_parser/flat.rb +60 -0
  161. data/lib/wukong/model/flatpack_parser/flatpack.rb +4 -0
  162. data/lib/wukong/model/flatpack_parser/lang.rb +46 -0
  163. data/lib/wukong/model/flatpack_parser/parser.rb +55 -0
  164. data/lib/wukong/model/flatpack_parser/tokens.rb +130 -0
  165. data/lib/wukong/plugin.rb +48 -0
  166. data/lib/wukong/processor.rb +110 -0
  167. data/lib/wukong/rake_helper.rb +6 -0
  168. data/lib/wukong/runner.rb +169 -0
  169. data/lib/wukong/runner/boot_sequence.rb +123 -0
  170. data/lib/wukong/runner/code_loader.rb +52 -0
  171. data/lib/wukong/runner/command_runner.rb +44 -0
  172. data/lib/wukong/runner/deploy_pack_loader.rb +75 -0
  173. data/lib/wukong/runner/help_message.rb +42 -0
  174. data/lib/wukong/source.rb +33 -0
  175. data/lib/wukong/source/source_driver.rb +74 -0
  176. data/lib/wukong/source/source_runner.rb +38 -0
  177. data/lib/wukong/spec_helpers.rb +74 -0
  178. data/lib/wukong/spec_helpers/integration_tests.rb +150 -0
  179. data/lib/wukong/spec_helpers/integration_tests/integration_test_matchers.rb +207 -0
  180. data/lib/wukong/spec_helpers/integration_tests/integration_test_runner.rb +97 -0
  181. data/lib/wukong/spec_helpers/shared_examples.rb +22 -0
  182. data/lib/wukong/spec_helpers/unit_tests.rb +135 -0
  183. data/lib/wukong/spec_helpers/unit_tests/unit_test_driver.rb +132 -0
  184. data/lib/wukong/spec_helpers/unit_tests/unit_test_matchers.rb +169 -0
  185. data/lib/wukong/spec_helpers/unit_tests/unit_test_runner.rb +60 -0
  186. data/lib/wukong/version.rb +3 -0
  187. data/lib/wukong/widget/echo.rb +55 -0
  188. data/lib/wukong/widget/extract.rb +122 -0
  189. data/lib/wukong/widget/filters.rb +452 -0
  190. data/lib/wukong/widget/logger.rb +56 -0
  191. data/lib/wukong/widget/operators.rb +82 -0
  192. data/lib/wukong/widget/reducers.rb +10 -0
  193. data/lib/wukong/widget/reducers/accumulator.rb +73 -0
  194. data/lib/wukong/widget/reducers/bin.rb +368 -0
  195. data/lib/wukong/widget/reducers/count.rb +73 -0
  196. data/lib/wukong/widget/reducers/group.rb +128 -0
  197. data/lib/wukong/widget/reducers/group_concat.rb +98 -0
  198. data/lib/wukong/widget/reducers/improver.rb +71 -0
  199. data/lib/wukong/widget/reducers/join_xml.rb +37 -0
  200. data/lib/wukong/widget/reducers/moments.rb +72 -0
  201. data/lib/wukong/widget/reducers/sort.rb +180 -0
  202. data/lib/wukong/widget/reducers/uniq.rb +91 -0
  203. data/lib/wukong/widget/serializers.rb +317 -0
  204. data/lib/wukong/widget/utils.rb +46 -0
  205. data/lib/wukong/widgets.rb +7 -0
  206. data/spec/examples/dataflow/fibonacci_series_spec.rb +18 -0
  207. data/spec/examples/dataflow/parse_apache_logs_spec.rb +8 -0
  208. data/spec/examples/dataflow/parsing_spec.rb +14 -0
  209. data/spec/examples/dataflow/simple_spec.rb +34 -0
  210. data/spec/examples/dataflow/telegram_spec.rb +43 -0
  211. data/spec/examples/graph/minimum_spanning_tree_spec.rb +34 -0
  212. data/spec/examples/munging/airline_flights/identifiers_spec.rb +16 -0
  213. data/spec/examples/munging/airline_flights_spec.rb +202 -0
  214. data/spec/examples/text/pig_latin_spec.rb +18 -0
  215. data/spec/examples/workflow/cherry_pie_spec.rb +36 -0
  216. data/spec/hanuman/graph_spec.rb +119 -0
  217. data/spec/hanuman/hanuman_spec.rb +10 -0
  218. data/spec/hanuman/registry_spec.rb +123 -0
  219. data/spec/hanuman/stage_spec.rb +81 -0
  220. data/spec/hanuman/tree_spec.rb +119 -0
  221. data/spec/spec.opts +1 -0
  222. data/spec/spec_helper.rb +43 -0
  223. data/spec/support/example_test_helpers.rb +95 -0
  224. data/spec/support/hanuman_test_helpers.rb +92 -0
  225. data/spec/support/integration_helper.rb +38 -0
  226. data/spec/support/model_test_helpers.rb +115 -0
  227. data/spec/support/shared_context_for_graphs.rb +57 -0
  228. data/spec/support/shared_context_for_reducers.rb +37 -0
  229. data/spec/support/shared_examples_for_builders.rb +94 -0
  230. data/spec/support/shared_examples_for_shortcuts.rb +57 -0
  231. data/spec/wu/model/reconcilable_spec.rb +152 -0
  232. data/spec/wukong/dataflow_spec.rb +87 -0
  233. data/spec/wukong/driver_spec.rb +154 -0
  234. data/spec/wukong/local/runner_spec.rb +29 -0
  235. data/spec/wukong/local/stdio_driver_spec.rb +73 -0
  236. data/spec/wukong/local_spec.rb +6 -0
  237. data/spec/wukong/logger_spec.rb +49 -0
  238. data/spec/wukong/model/faker_spec.rb +132 -0
  239. data/spec/wukong/processor_spec.rb +21 -0
  240. data/spec/wukong/runner_spec.rb +132 -0
  241. data/spec/wukong/source_spec.rb +6 -0
  242. data/spec/wukong/widget/extract_spec.rb +101 -0
  243. data/spec/wukong/widget/filters_spec.rb +79 -0
  244. data/spec/wukong/widget/logger_spec.rb +23 -0
  245. data/spec/wukong/widget/operators_spec.rb +25 -0
  246. data/spec/wukong/widget/reducers/bin_spec.rb +92 -0
  247. data/spec/wukong/widget/reducers/count_spec.rb +11 -0
  248. data/spec/wukong/widget/reducers/group_spec.rb +21 -0
  249. data/spec/wukong/widget/reducers/join_xml_spec.rb +25 -0
  250. data/spec/wukong/widget/reducers/moments_spec.rb +36 -0
  251. data/spec/wukong/widget/reducers/sort_spec.rb +26 -0
  252. data/spec/wukong/widget/reducers/uniq_spec.rb +14 -0
  253. data/spec/wukong/widget/serializers_spec.rb +114 -0
  254. data/spec/wukong/widget/sink_spec.rb +19 -0
  255. data/spec/wukong/widget/source_spec.rb +65 -0
  256. data/spec/wukong/wu-local_spec.rb +109 -0
  257. data/spec/wukong/wu-source_spec.rb +32 -0
  258. data/spec/wukong/wu_spec.rb +14 -0
  259. data/spec/wukong/wukong_spec.rb +10 -0
  260. data/wukong.gemspec +35 -0
  261. metadata +465 -0
@@ -0,0 +1,123 @@
1
+ require_relative('help_message')
2
+ module Wukong
3
+ class Runner
4
+
5
+ # The boot sequence of a runner consists of the following phases,
6
+ # each corresponding to a method provided by this module.
7
+ #
8
+ # * #load -- loads all application code
9
+ # * #configure -- configures settings from core Wukong, any loaded plugins, and any application code
10
+ # * #resolve -- resolves settings
11
+ # * #setup -- boots core Wukong and all loaded plugins
12
+ # * #validate -- validates command-line args
13
+ # * #run -- starts the runner running
14
+ #
15
+ # Each method can be separately overriden, allowing for a lot of
16
+ # customizability for different kinds of runners.
17
+ module BootSequence
18
+
19
+ include HelpMessage
20
+
21
+ # Boot this Runner, calling in order:
22
+ #
23
+ # * #load
24
+ # * #configure
25
+ # * #resolve
26
+ # * #setup
27
+ # * #validate
28
+ # * #run or #die
29
+ #
30
+ # If `override_settings` is passed then merge it over the
31
+ # Runner's usual settings (this is useful for unit tests where
32
+ # settings are injected in ways different from the usual
33
+ # workflow).
34
+ #
35
+ # @param [Configliere::Param] override_settings
36
+ def boot!(override_settings=nil)
37
+ load
38
+ configure
39
+ resolve
40
+ setup
41
+ settings.merge!(override_settings) if override_settings
42
+
43
+ case
44
+ when help_given? then dump_help_and_exit!
45
+ when validate then run
46
+ else
47
+ die("Invalid arguments")
48
+ end
49
+ end
50
+
51
+ private
52
+
53
+ # Loads all code necessary for this Runner to perform, including:
54
+ #
55
+ # * any code associated with being inside of a deploy pack
56
+ # * any code passed in as (unknown rest) arguments on the command-line
57
+ def load
58
+ load_deploy_pack
59
+ load_args
60
+ end
61
+
62
+ # Endows the settings with everything it needs, including usage,
63
+ # description, and any define's provided by this Runner class or
64
+ # any plugins.
65
+ def configure
66
+ settings.use(:commandline)
67
+ settings.description = description if description
68
+ u = usage
69
+ settings.define_singleton_method(:usage){ u } if u
70
+ Wukong.configure_plugins(settings, program_name)
71
+ end
72
+
73
+ # Resolves the settings.
74
+ #
75
+ # Rescues some of the annoying RuntimeErrors thrown by
76
+ # Configliere...
77
+ def resolve
78
+ begin
79
+ strip_help_param!
80
+ settings.resolve!
81
+ true
82
+ rescue RuntimeError, SystemExit => e
83
+ raise Error.new(e)
84
+ end
85
+ end
86
+
87
+ # Performs any setup code necessary before run.
88
+ #
89
+ # Boots all plugins by default. If you override this code, make
90
+ # sure to either call `super` or boot plugins yourself.
91
+ def setup
92
+ Wukong.boot_plugins(settings, root)
93
+ end
94
+
95
+ # Validates the command-line args. Raise a Wukong::Error in
96
+ # this method to terminate execution with a specific or custom
97
+ # error.
98
+ #
99
+ # Return false-like to terminate with a generic argument error.
100
+ #
101
+ # @return [true, false]
102
+ def validate
103
+ true
104
+ end
105
+
106
+ # Run this runner.
107
+ #
108
+ # You'll want to override this method in your own Runner class.
109
+ def run
110
+ end
111
+
112
+ # Kill this runner with the given error `message` and exit
113
+ # `code`.
114
+ #
115
+ # @param [String] message
116
+ # @param [Integer] code
117
+ def die message=nil, code=126
118
+ self.class.die(message, code)
119
+ end
120
+
121
+ end
122
+ end
123
+ end
@@ -0,0 +1,52 @@
1
+ module Wukong
2
+ class Runner
3
+
4
+ # Defines methods to help a Runner class load code passed in
5
+ # dynamically on the command-line.
6
+ #
7
+ # The default behavior of code in this module is to load any Ruby
8
+ # files (ending with `.rb`) passed in the command-line.
9
+ module CodeLoader
10
+
11
+ # Loads all code, whether from a deploy pack or additionally
12
+ # passed on the command line.
13
+ def load_args
14
+ (args_to_load || []).each do |path|
15
+ load_ruby_file(path)
16
+ end
17
+ end
18
+
19
+ private
20
+
21
+ # Load any additional code that we found out about on the
22
+ # command-line.
23
+ #
24
+ # @return [Array<String>] paths to load culled from the ARGV.
25
+ def args_to_load
26
+ ruby_file_args || []
27
+ end
28
+
29
+ # Returns all pre-resolved arguments which are Ruby files.
30
+ #
31
+ # @return [Array<String>]
32
+ def ruby_file_args
33
+ ARGV.find_all { |arg| arg.to_s =~ /\.rb$/ && arg.to_s !~ /^--/ }
34
+ end
35
+
36
+ # Loads a single Ruby file, capturing LoadError and SyntaxError
37
+ # and raising Wukong::Error instead (so it can be easily captured
38
+ # by the Runner).
39
+ #
40
+ # @param [String] path
41
+ # @raise [Wukong::Error] if there is an error
42
+ def load_ruby_file path
43
+ return unless path
44
+ begin
45
+ Kernel.load path
46
+ rescue LoadError, SyntaxError => e
47
+ raise Error.new(e)
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,44 @@
1
+ module Wukong
2
+ class Runner
3
+
4
+ # Provides methods for executing commandlines.
5
+ module CommandRunner
6
+
7
+ private
8
+
9
+ # Execute a command composed of the given parts.
10
+ #
11
+ # Will print the command instead if the <tt>--dry_run</tt>
12
+ # option was given.
13
+ #
14
+ # Will *not* raise an error if the command fails.
15
+ #
16
+ # @param [Array<String>] argv
17
+ def execute_command(*argv)
18
+ command = argv.flatten.reject(&:blank?).join(" \\\n ")
19
+ if settings[:dry_run]
20
+ log.info("Dry run:")
21
+ puts command
22
+ else
23
+ output = `#{command}`
24
+ puts output unless output.empty?
25
+ end
26
+ end
27
+
28
+ # Execute a command composed of the given parts.
29
+ #
30
+ # Will print the command instead if the <tt>--dry_run</tt>
31
+ # option was given.
32
+ #
33
+ # *Will* raise an error if the command fails.
34
+ #
35
+ # @param [Array<String>] argv
36
+ def execute_command!(*argv)
37
+ execute_command(argv)
38
+ raise Error.new("Command failed!") unless $?.success?
39
+ end
40
+
41
+ end
42
+
43
+ end
44
+ end
@@ -0,0 +1,75 @@
1
+ module Wukong
2
+ class Runner
3
+
4
+ # Lets Wukong bootstrap by requiring an enclosing deploy pack's
5
+ # environment file if available.
6
+ #
7
+ # We use a simple heuristic (presence of 'Gemfile' and
8
+ # 'config/environment.rb' in a non-root parent directory) to
9
+ # determine whether or not we are in a deploy pack.
10
+ module DeployPackLoader
11
+
12
+ # Load the actual deploy pack environment. Will not swallow any
13
+ # load errors.
14
+ def load_deploy_pack
15
+ load_ruby_file(environment_file) if in_deploy_pack?
16
+ end
17
+
18
+ # Is execution likely happening within a deploy pack?
19
+ #
20
+ # See Wukong::Deploy for more information on deploy packs.
21
+ #
22
+ # @return [true, false]
23
+ def in_deploy_pack?
24
+ return @in_deploy_pack unless @in_deploy_pack.nil?
25
+ @in_deploy_pack = (deploy_pack_dir != '/')
26
+ end
27
+
28
+ # Have we already loaded the environment of a deploy pack?
29
+ #
30
+ # See Wukong::Deploy for more information on deploy packs.
31
+ #
32
+ # @return [true, false]
33
+ def loaded_deploy_pack?
34
+ in_deploy_pack? && defined?(::Wukong::Deploy)
35
+ end
36
+
37
+ # The default environment file that will be require'd when
38
+ # booting.
39
+ #
40
+ # @return [String]
41
+ def environment_file
42
+ File.join(deploy_pack_dir, 'config', 'environment.rb')
43
+ end
44
+
45
+ # Return the directory of the enclosing deploy pack. Will return
46
+ # the root ('/') if no deeper directory is identified as a deploy
47
+ # pack.
48
+ #
49
+ # @return [String]
50
+ def deploy_pack_dir
51
+ return File.dirname(ENV["BUNDLE_GEMFILE"]) if ENV["BUNDLE_GEMFILE"] && is_deploy_pack_dir?(File.dirname(ENV["BUNDLE_GEMFILE"]))
52
+ return @deploy_pack_dir if @deploy_pack_dir
53
+ wd = Dir.pwd
54
+ parent = File.dirname(wd)
55
+ until wd == parent
56
+ return wd if is_deploy_pack_dir?(wd)
57
+ wd = parent
58
+ parent = File.dirname(wd)
59
+ end
60
+ @deploy_pack_dir = wd
61
+ end
62
+
63
+ private
64
+
65
+ # Could `dir` be a deploy pack dir?
66
+ #
67
+ # @param [String] dir
68
+ # @return [true, false]
69
+ def is_deploy_pack_dir? dir
70
+ dir && !dir.empty? && File.directory?(dir) && File.exist?(File.join(dir, 'Gemfile')) && File.exist?(File.join(dir, 'config', 'environment.rb'))
71
+ end
72
+ end
73
+
74
+ end
75
+ end
@@ -0,0 +1,42 @@
1
+ module Wukong
2
+ class Runner
3
+
4
+ # Defines methods for handling help messages.
5
+ #
6
+ # Runners which want to modify how help messages are generated
7
+ # should override the `contextualize_help_message!` instance method.
8
+ module HelpMessage
9
+
10
+ # Was the `--help` option specified on the command line?
11
+ #
12
+ # The boot sequence for a Runner strips out the `--help` option to
13
+ # allow individual Runner classes to customize their help
14
+ # messages.
15
+ #
16
+ # @return [true, false]
17
+ def help_given?
18
+ !!@help_given
19
+ end
20
+
21
+ # Strip the `--help` message from the original ARGV, storing
22
+ # whether or not it was given for later.
23
+ def strip_help_param!
24
+ @help_given = ARGV.delete('--help')
25
+ end
26
+
27
+ # Print a help message.
28
+ def dump_help
29
+ settings.dump_help
30
+ end
31
+
32
+ # Print a help message and exit.
33
+ #
34
+ # @raise [SystemExit]
35
+ def dump_help_and_exit!
36
+ dump_help
37
+ exit(1)
38
+ end
39
+
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,33 @@
1
+ module Wukong
2
+
3
+ # Provides a runner for periodically triggering a dataflow or
4
+ # processor.
5
+ module Source
6
+ include Plugin
7
+
8
+ # Configures the given +settings+ object with all settings
9
+ # specific to Wukong::Source for the given program +name+.
10
+ #
11
+ # @param [Configliere::Param] settings the settings to configure
12
+ # @param [String] program the name of the currently executing program
13
+ def self.configure settings, program
14
+ case program
15
+ when 'wu-source'
16
+ settings.define :per_sec, description: "Number of events produced per second", type: Float
17
+ settings.define :period, description: "Number of seconds between events (overrides --per_sec)", type: Float
18
+ settings.define :batch_size, description: "Trigger a finalize across the dataflow each time this many records are processed", type: Integer
19
+ end
20
+ end
21
+
22
+ # Boots Wukong::Source using the given +settings+ at the given
23
+ # +root.
24
+ #
25
+ # @param [Configliere::Param] settings the settings to use to boot
26
+ # @param [String] root the root directory to boot in
27
+ def self.boot(settings, root)
28
+ end
29
+
30
+ end
31
+ end
32
+
33
+ require_relative('source/source_runner')
@@ -0,0 +1,74 @@
1
+ module Wukong
2
+ module Source
3
+
4
+ # A driver which works just like the `Wukong::Local::StdioDriver`
5
+ # except it ignores input from `STDIN` and instead generates its
6
+ # own input records according to some periodic schedule. Each
7
+ # consecutive record produced will be an incrementing positive
8
+ # integer (as a string), starting with '1'.
9
+ class SourceDriver < Wukong::Local::StdioDriver
10
+
11
+ include Logging
12
+
13
+ # The index of the record.
14
+ attr_accessor :index
15
+
16
+ # The number of records after which a `Processor#finalize` will
17
+ # be called.
18
+ attr_accessor :batch_size
19
+
20
+ # Sets the initial value of `index` to 1 and sets the batch size
21
+ # (only if it's positive).
22
+ def post_init
23
+ super()
24
+ self.index = 1
25
+ self.batch_size = settings[:batch_size].to_i if settings[:batch_size] && settings[:batch_size].to_i > 0
26
+ end
27
+
28
+ # Starts periodically feeding the processor or dataflow given by
29
+ # `label` using the given `settings`.
30
+ #
31
+ # @param [String, Symbol] label
32
+ # @param [Configliere::Param, Hash] settings
33
+ def self.start(label, settings={})
34
+ driver = new(:foobar, label, settings) # i don't think the 1st argument matters here...
35
+ driver.post_init
36
+
37
+ period = case
38
+ when settings[:period] then settings[:period]
39
+ when settings[:per_sec] then (1.0 / settings[:per_sec]) rescue 1.0
40
+ else 1.0
41
+ end
42
+ driver.create_event
43
+ EventMachine::PeriodicTimer.new(period) { driver.create_event }
44
+ end
45
+
46
+ # Creates a new event using the following steps:
47
+ #
48
+ # 1. Feeds a record with the existing `index` to the dataflow.
49
+ # 2. Increments the `index`.
50
+ # 3. Finalizes the dataflow if the number of records is a
51
+ # multiple of the `batch_size`.
52
+ #
53
+ # @see DriverMethods
54
+ def create_event
55
+ receive_line(index.to_s)
56
+ self.index += 1
57
+ finalize_dataflow if self.batch_size && (self.index % self.batch_size) == 0
58
+ end
59
+
60
+ # Outputs a `record` from the dataflow or processor to `STDOUT`.
61
+ #
62
+ # `STDOUT` will automatically be flushed to force output to
63
+ # prevent the feeling of "no output" when the looping period is
64
+ # long.
65
+ #
66
+ # @param [Object] record the record yielded by the processor or the terminal node(s) of the dataflow
67
+ def process record
68
+ $stdout.puts record
69
+ $stdout.flush
70
+ end
71
+
72
+ end
73
+ end
74
+ end