wukong 3.0.0.pre → 3.0.0.pre2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +46 -33
- data/.gitmodules +3 -0
- data/.rspec +1 -1
- data/.travis.yml +8 -1
- data/.yardopts +0 -13
- data/Guardfile +4 -6
- data/{LICENSE.textile → LICENSE.md} +43 -55
- data/README-old.md +422 -0
- data/README.md +279 -418
- data/Rakefile +21 -5
- data/TODO.md +6 -6
- data/bin/wu-clean-encoding +31 -0
- data/bin/wu-lign +2 -2
- data/bin/wu-local +69 -0
- data/bin/wu-server +70 -0
- data/examples/Gemfile +38 -0
- data/examples/README.md +9 -0
- data/examples/dataflow/apache_log_line.rb +64 -25
- data/examples/dataflow/fibonacci_series.rb +101 -0
- data/examples/dataflow/parse_apache_logs.rb +37 -7
- data/examples/{dataflow.rb → dataflow/scraper_macro_flow.rb} +0 -0
- data/examples/dataflow/simple.rb +4 -4
- data/examples/geo.rb +4 -0
- data/examples/geo/geo_grids.numbers +0 -0
- data/examples/geo/geolocated.rb +331 -0
- data/examples/geo/quadtile.rb +69 -0
- data/examples/geo/spec/geolocated_spec.rb +247 -0
- data/examples/geo/tile_fetcher.rb +77 -0
- data/examples/graph/minimum_spanning_tree.rb +61 -61
- data/examples/jabberwocky.txt +36 -0
- data/examples/models/wikipedia.rb +20 -0
- data/examples/munging/Gemfile +8 -0
- data/examples/munging/airline_flights/airline.rb +57 -0
- data/examples/munging/airline_flights/airline_flights.rake +83 -0
- data/{lib/wukong/settings.rb → examples/munging/airline_flights/airplane.rb} +0 -0
- data/examples/munging/airline_flights/airport.rb +211 -0
- data/examples/munging/airline_flights/airport_id_unification.rb +129 -0
- data/examples/munging/airline_flights/airport_ok_chars.rb +4 -0
- data/examples/munging/airline_flights/flight.rb +156 -0
- data/examples/munging/airline_flights/models.rb +4 -0
- data/examples/munging/airline_flights/parse.rb +26 -0
- data/examples/munging/airline_flights/reconcile_airports.rb +142 -0
- data/examples/munging/airline_flights/route.rb +35 -0
- data/examples/munging/airline_flights/tasks.rake +83 -0
- data/examples/munging/airline_flights/timezone_fixup.rb +62 -0
- data/examples/munging/airline_flights/topcities.rb +167 -0
- data/examples/munging/airports/40_wbans.txt +40 -0
- data/examples/munging/airports/filter_weather_reports.rb +37 -0
- data/examples/munging/airports/join.pig +31 -0
- data/examples/munging/airports/to_tsv.rb +33 -0
- data/examples/munging/airports/usa_wbans.pig +19 -0
- data/examples/munging/airports/usa_wbans.txt +2157 -0
- data/examples/munging/airports/wbans.pig +19 -0
- data/examples/munging/airports/wbans.txt +2310 -0
- data/examples/munging/geo/geo_json.rb +54 -0
- data/examples/munging/geo/geo_models.rb +69 -0
- data/examples/munging/geo/geonames_models.rb +78 -0
- data/examples/munging/geo/iso_codes.rb +172 -0
- data/examples/munging/geo/reconcile_countries.rb +124 -0
- data/examples/munging/geo/tasks.rake +71 -0
- data/examples/munging/rake_helper.rb +62 -0
- data/examples/munging/weather/.gitignore +1 -0
- data/examples/munging/weather/Gemfile +4 -0
- data/examples/munging/weather/Rakefile +28 -0
- data/examples/munging/weather/extract_ish.rb +13 -0
- data/examples/munging/weather/models/weather.rb +119 -0
- data/examples/munging/weather/utils/noaa_downloader.rb +46 -0
- data/examples/munging/wikipedia/README.md +34 -0
- data/examples/munging/wikipedia/Rakefile +193 -0
- data/examples/munging/wikipedia/articles/extract_articles-parsed.rb +79 -0
- data/examples/munging/wikipedia/articles/extract_articles-templated.rb +136 -0
- data/examples/munging/wikipedia/articles/textualize_articles.rb +54 -0
- data/examples/munging/wikipedia/articles/verify_structure.rb +43 -0
- data/examples/munging/wikipedia/articles/wp2txt-LICENSE.txt +22 -0
- data/examples/munging/wikipedia/articles/wp2txt_article.rb +259 -0
- data/examples/munging/wikipedia/articles/wp2txt_utils.rb +452 -0
- data/examples/munging/wikipedia/dbpedia/dbpedia_common.rb +4 -0
- data/examples/munging/wikipedia/dbpedia/dbpedia_extract_geocoordinates.rb +78 -0
- data/examples/munging/wikipedia/dbpedia/extract_links.rb +193 -0
- data/examples/munging/wikipedia/dbpedia/sameas_extractor.rb +20 -0
- data/examples/munging/wikipedia/n1_subuniverse/n1_nodes.pig +18 -0
- data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb +21 -0
- data/examples/munging/wikipedia/page_metadata/extract_page_metadata.rb.old +27 -0
- data/examples/munging/wikipedia/pagelinks/augment_pagelinks.pig +29 -0
- data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb +14 -0
- data/examples/munging/wikipedia/pagelinks/extract_pagelinks.rb.old +25 -0
- data/examples/munging/wikipedia/pagelinks/undirect_pagelinks.pig +29 -0
- data/examples/munging/wikipedia/pageviews/augment_pageviews.pig +32 -0
- data/examples/munging/wikipedia/pageviews/extract_pageviews.rb +85 -0
- data/examples/munging/wikipedia/pig_style_guide.md +25 -0
- data/examples/munging/wikipedia/redirects/redirects_page_metadata.pig +19 -0
- data/examples/munging/wikipedia/subuniverse/sub_articles.pig +23 -0
- data/examples/munging/wikipedia/subuniverse/sub_page_metadata.pig +24 -0
- data/examples/munging/wikipedia/subuniverse/sub_pagelinks_from.pig +22 -0
- data/examples/munging/wikipedia/subuniverse/sub_pagelinks_into.pig +22 -0
- data/examples/munging/wikipedia/subuniverse/sub_pagelinks_within.pig +26 -0
- data/examples/munging/wikipedia/subuniverse/sub_pageviews.pig +29 -0
- data/examples/munging/wikipedia/subuniverse/sub_undirected_pagelinks_within.pig +24 -0
- data/examples/munging/wikipedia/utils/get_namespaces.rb +86 -0
- data/examples/munging/wikipedia/utils/munging_utils.rb +68 -0
- data/examples/munging/wikipedia/utils/namespaces.json +1 -0
- data/examples/rake_helper.rb +85 -0
- data/examples/server_logs/geo_ip_mapping/munge_geolite.rb +82 -0
- data/examples/server_logs/logline.rb +95 -0
- data/examples/server_logs/models.rb +66 -0
- data/examples/server_logs/page_counts.pig +48 -0
- data/examples/server_logs/server_logs-01-parse-script.rb +13 -0
- data/examples/server_logs/server_logs-02-histograms-full.rb +33 -0
- data/examples/server_logs/server_logs-02-histograms-mapper.rb +14 -0
- data/{old/examples/server_logs/breadcrumbs.rb → examples/server_logs/server_logs-03-breadcrumbs-full.rb} +26 -30
- data/examples/server_logs/server_logs-04-page_page_edges-full.rb +40 -0
- data/examples/string_reverser.rb +26 -0
- data/examples/text/pig_latin.rb +2 -2
- data/examples/text/regional_flavor/README.md +14 -0
- data/examples/text/regional_flavor/article_wordbags.pig +39 -0
- data/examples/text/regional_flavor/j01-article_wordbags.rb +4 -0
- data/examples/text/regional_flavor/simple_pig_script.pig +27 -0
- data/examples/word_count/accumulator.rb +26 -0
- data/examples/word_count/tokenizer.rb +13 -0
- data/examples/word_count/word_count.rb +6 -0
- data/examples/workflow/cherry_pie.dot +97 -0
- data/examples/workflow/cherry_pie.png +0 -0
- data/examples/workflow/cherry_pie.rb +61 -26
- data/lib/hanuman.rb +34 -7
- data/lib/hanuman/graph.rb +55 -31
- data/lib/hanuman/graphvizzer.rb +199 -178
- data/lib/hanuman/graphvizzer/gv_models.rb +161 -0
- data/lib/hanuman/graphvizzer/gv_presenter.rb +97 -0
- data/lib/hanuman/link.rb +35 -0
- data/lib/hanuman/registry.rb +46 -0
- data/lib/hanuman/stage.rb +76 -32
- data/lib/wukong.rb +23 -24
- data/lib/wukong/boot.rb +87 -0
- data/lib/wukong/configuration.rb +8 -0
- data/lib/wukong/dataflow.rb +45 -78
- data/lib/wukong/driver.rb +99 -0
- data/lib/wukong/emitter.rb +22 -0
- data/lib/wukong/model/faker.rb +24 -24
- data/lib/wukong/model/flatpack_parser/flat.rb +60 -0
- data/lib/wukong/model/flatpack_parser/flatpack.rb +4 -0
- data/lib/wukong/model/flatpack_parser/lang.rb +46 -0
- data/lib/wukong/model/flatpack_parser/parser.rb +55 -0
- data/lib/wukong/model/flatpack_parser/tokens.rb +130 -0
- data/lib/wukong/processor.rb +60 -114
- data/lib/wukong/spec_helpers.rb +81 -0
- data/lib/wukong/spec_helpers/integration_driver.rb +144 -0
- data/lib/wukong/spec_helpers/integration_driver_matchers.rb +219 -0
- data/lib/wukong/spec_helpers/processor_helpers.rb +95 -0
- data/lib/wukong/spec_helpers/processor_methods.rb +108 -0
- data/lib/wukong/spec_helpers/shared_examples.rb +15 -0
- data/lib/wukong/spec_helpers/spec_driver.rb +28 -0
- data/lib/wukong/spec_helpers/spec_driver_matchers.rb +195 -0
- data/lib/wukong/version.rb +2 -1
- data/lib/wukong/widget/filters.rb +311 -0
- data/lib/wukong/widget/processors.rb +156 -0
- data/lib/wukong/widget/reducers.rb +7 -0
- data/lib/wukong/widget/reducers/accumulator.rb +73 -0
- data/lib/wukong/widget/reducers/bin.rb +318 -0
- data/lib/wukong/widget/reducers/count.rb +61 -0
- data/lib/wukong/widget/reducers/group.rb +85 -0
- data/lib/wukong/widget/reducers/group_concat.rb +70 -0
- data/lib/wukong/widget/reducers/moments.rb +72 -0
- data/lib/wukong/widget/reducers/sort.rb +130 -0
- data/lib/wukong/widget/serializers.rb +287 -0
- data/lib/wukong/widget/sink.rb +10 -52
- data/lib/wukong/widget/source.rb +7 -113
- data/lib/wukong/widget/utils.rb +46 -0
- data/lib/wukong/widgets.rb +6 -0
- data/spec/examples/dataflow/fibonacci_series_spec.rb +18 -0
- data/spec/examples/dataflow/parsing_spec.rb +12 -11
- data/spec/examples/dataflow/simple_spec.rb +32 -6
- data/spec/examples/dataflow/telegram_spec.rb +36 -36
- data/spec/examples/graph/minimum_spanning_tree_spec.rb +30 -31
- data/spec/examples/munging/airline_flights/identifiers_spec.rb +16 -0
- data/spec/examples/munging/airline_flights_spec.rb +202 -0
- data/spec/examples/text/pig_latin_spec.rb +13 -16
- data/spec/examples/workflow/cherry_pie_spec.rb +34 -4
- data/spec/hanuman/graph_spec.rb +27 -2
- data/spec/hanuman/hanuman_spec.rb +10 -0
- data/spec/hanuman/registry_spec.rb +123 -0
- data/spec/hanuman/stage_spec.rb +61 -7
- data/spec/spec_helper.rb +29 -19
- data/spec/support/hanuman_test_helpers.rb +14 -12
- data/spec/support/shared_context_for_reducers.rb +37 -0
- data/spec/support/shared_examples_for_builders.rb +101 -0
- data/spec/support/shared_examples_for_shortcuts.rb +57 -0
- data/spec/support/wukong_test_helpers.rb +37 -11
- data/spec/wukong/dataflow_spec.rb +77 -55
- data/spec/wukong/local_runner_spec.rb +24 -24
- data/spec/wukong/model/faker_spec.rb +132 -131
- data/spec/wukong/runner_spec.rb +8 -8
- data/spec/wukong/widget/filters_spec.rb +61 -0
- data/spec/wukong/widget/processors_spec.rb +126 -0
- data/spec/wukong/widget/reducers/bin_spec.rb +92 -0
- data/spec/wukong/widget/reducers/count_spec.rb +11 -0
- data/spec/wukong/widget/reducers/group_spec.rb +20 -0
- data/spec/wukong/widget/reducers/moments_spec.rb +36 -0
- data/spec/wukong/widget/reducers/sort_spec.rb +26 -0
- data/spec/wukong/widget/serializers_spec.rb +92 -0
- data/spec/wukong/widget/sink_spec.rb +15 -15
- data/spec/wukong/widget/source_spec.rb +65 -41
- data/spec/wukong/wukong_spec.rb +10 -0
- data/wukong.gemspec +17 -10
- metadata +359 -335
- data/.document +0 -5
- data/VERSION +0 -1
- data/bin/hdp-bin +0 -44
- data/bin/hdp-bzip +0 -23
- data/bin/hdp-cat +0 -3
- data/bin/hdp-catd +0 -3
- data/bin/hdp-cp +0 -3
- data/bin/hdp-du +0 -86
- data/bin/hdp-get +0 -3
- data/bin/hdp-kill +0 -3
- data/bin/hdp-kill-task +0 -3
- data/bin/hdp-ls +0 -11
- data/bin/hdp-mkdir +0 -2
- data/bin/hdp-mkdirp +0 -12
- data/bin/hdp-mv +0 -3
- data/bin/hdp-parts_to_keys.rb +0 -77
- data/bin/hdp-ps +0 -3
- data/bin/hdp-put +0 -3
- data/bin/hdp-rm +0 -32
- data/bin/hdp-sort +0 -40
- data/bin/hdp-stream +0 -40
- data/bin/hdp-stream-flat +0 -22
- data/bin/hdp-stream2 +0 -39
- data/bin/hdp-sync +0 -17
- data/bin/hdp-wc +0 -67
- data/bin/wu-flow +0 -10
- data/bin/wu-map +0 -17
- data/bin/wu-red +0 -17
- data/bin/wukong +0 -17
- data/data/CREDITS.md +0 -355
- data/data/graph/airfares.tsv +0 -2174
- data/data/text/gift_of_the_magi.txt +0 -225
- data/data/text/jabberwocky.txt +0 -36
- data/data/text/rectification_of_names.txt +0 -33
- data/data/twitter/a_atsigns_b.tsv +0 -64
- data/data/twitter/a_follows_b.tsv +0 -53
- data/data/twitter/tweet.tsv +0 -167
- data/data/twitter/twitter_user.tsv +0 -55
- data/data/wikipedia/dbpedia-sentences.tsv +0 -1000
- data/docpages/INSTALL.textile +0 -92
- data/docpages/LICENSE.textile +0 -107
- data/docpages/README-elastic_map_reduce.textile +0 -377
- data/docpages/README-performance.textile +0 -90
- data/docpages/README-wulign.textile +0 -65
- data/docpages/UsingWukong-part1-get_ready.textile +0 -17
- data/docpages/UsingWukong-part2-ThinkingBigData.textile +0 -75
- data/docpages/UsingWukong-part3-parsing.textile +0 -138
- data/docpages/_config.yml +0 -39
- data/docpages/avro/avro_notes.textile +0 -56
- data/docpages/avro/performance.textile +0 -36
- data/docpages/avro/tethering.textile +0 -19
- data/docpages/bigdata-tips.textile +0 -143
- data/docpages/code/api_response_example.txt +0 -20
- data/docpages/code/parser_skeleton.rb +0 -38
- data/docpages/diagrams/MapReduceDiagram.graffle +0 -0
- data/docpages/favicon.ico +0 -0
- data/docpages/gem.css +0 -16
- data/docpages/hadoop-tips.textile +0 -83
- data/docpages/index.textile +0 -92
- data/docpages/intro.textile +0 -8
- data/docpages/moreinfo.textile +0 -174
- data/docpages/news.html +0 -24
- data/docpages/pig/PigLatinExpressionsList.txt +0 -122
- data/docpages/pig/PigLatinReferenceManual.txt +0 -1640
- data/docpages/pig/commandline_params.txt +0 -26
- data/docpages/pig/cookbook.html +0 -481
- data/docpages/pig/images/hadoop-logo.jpg +0 -0
- data/docpages/pig/images/instruction_arrow.png +0 -0
- data/docpages/pig/images/pig-logo.gif +0 -0
- data/docpages/pig/piglatin_ref1.html +0 -1103
- data/docpages/pig/piglatin_ref2.html +0 -14340
- data/docpages/pig/setup.html +0 -505
- data/docpages/pig/skin/basic.css +0 -166
- data/docpages/pig/skin/breadcrumbs.js +0 -237
- data/docpages/pig/skin/fontsize.js +0 -166
- data/docpages/pig/skin/getBlank.js +0 -40
- data/docpages/pig/skin/getMenu.js +0 -45
- data/docpages/pig/skin/images/chapter.gif +0 -0
- data/docpages/pig/skin/images/chapter_open.gif +0 -0
- data/docpages/pig/skin/images/current.gif +0 -0
- data/docpages/pig/skin/images/external-link.gif +0 -0
- data/docpages/pig/skin/images/header_white_line.gif +0 -0
- data/docpages/pig/skin/images/page.gif +0 -0
- data/docpages/pig/skin/images/pdfdoc.gif +0 -0
- data/docpages/pig/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
- data/docpages/pig/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
- data/docpages/pig/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/docpages/pig/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
- data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/docpages/pig/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
- data/docpages/pig/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
- data/docpages/pig/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
- data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/docpages/pig/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
- data/docpages/pig/skin/print.css +0 -54
- data/docpages/pig/skin/profile.css +0 -181
- data/docpages/pig/skin/screen.css +0 -587
- data/docpages/pig/tutorial.html +0 -1059
- data/docpages/pig/udf.html +0 -1509
- data/docpages/tutorial.textile +0 -283
- data/docpages/usage.textile +0 -195
- data/docpages/wutils.textile +0 -263
- data/examples/dataflow/complex.rb +0 -11
- data/examples/dataflow/donuts.rb +0 -13
- data/examples/tiny_count/jabberwocky_output.tsv +0 -92
- data/examples/word_count.rb +0 -48
- data/examples/workflow/fiddle.rb +0 -24
- data/lib/away/escapement.rb +0 -129
- data/lib/away/exe.rb +0 -11
- data/lib/away/experimental.rb +0 -5
- data/lib/away/from_file.rb +0 -52
- data/lib/away/job.rb +0 -56
- data/lib/away/job/rake_compat.rb +0 -17
- data/lib/away/registry.rb +0 -79
- data/lib/away/runner.rb +0 -276
- data/lib/away/runner/execute.rb +0 -121
- data/lib/away/script.rb +0 -161
- data/lib/away/script/hadoop_command.rb +0 -240
- data/lib/away/source/file_list_source.rb +0 -15
- data/lib/away/source/looper.rb +0 -18
- data/lib/away/task.rb +0 -219
- data/lib/hanuman/action.rb +0 -21
- data/lib/hanuman/chain.rb +0 -4
- data/lib/hanuman/graphviz.rb +0 -74
- data/lib/hanuman/resource.rb +0 -6
- data/lib/hanuman/slot.rb +0 -87
- data/lib/hanuman/slottable.rb +0 -220
- data/lib/wukong/bad_record.rb +0 -15
- data/lib/wukong/event.rb +0 -44
- data/lib/wukong/local_runner.rb +0 -55
- data/lib/wukong/mapred.rb +0 -3
- data/lib/wukong/universe.rb +0 -48
- data/lib/wukong/widget/filter.rb +0 -81
- data/lib/wukong/widget/gibberish.rb +0 -123
- data/lib/wukong/widget/monitor.rb +0 -26
- data/lib/wukong/widget/reducer.rb +0 -66
- data/lib/wukong/widget/stringifier.rb +0 -50
- data/lib/wukong/workflow.rb +0 -22
- data/lib/wukong/workflow/command.rb +0 -42
- data/old/config/emr-example.yaml +0 -48
- data/old/examples/README.txt +0 -17
- data/old/examples/contrib/jeans/README.markdown +0 -165
- data/old/examples/contrib/jeans/data/normalized_sizes +0 -3
- data/old/examples/contrib/jeans/data/orders.tsv +0 -1302
- data/old/examples/contrib/jeans/data/sizes +0 -3
- data/old/examples/contrib/jeans/normalize.rb +0 -20
- data/old/examples/contrib/jeans/sizes.rb +0 -55
- data/old/examples/corpus/bnc_word_freq.rb +0 -44
- data/old/examples/corpus/bucket_counter.rb +0 -47
- data/old/examples/corpus/dbpedia_abstract_to_sentences.rb +0 -86
- data/old/examples/corpus/sentence_bigrams.rb +0 -53
- data/old/examples/corpus/sentence_coocurrence.rb +0 -66
- data/old/examples/corpus/stopwords.rb +0 -138
- data/old/examples/corpus/words_to_bigrams.rb +0 -53
- data/old/examples/emr/README.textile +0 -110
- data/old/examples/emr/dot_wukong_dir/credentials.json +0 -7
- data/old/examples/emr/dot_wukong_dir/emr.yaml +0 -69
- data/old/examples/emr/dot_wukong_dir/emr_bootstrap.sh +0 -33
- data/old/examples/emr/elastic_mapreduce_example.rb +0 -28
- data/old/examples/network_graph/adjacency_list.rb +0 -74
- data/old/examples/network_graph/breadth_first_search.rb +0 -72
- data/old/examples/network_graph/gen_2paths.rb +0 -68
- data/old/examples/network_graph/gen_multi_edge.rb +0 -112
- data/old/examples/network_graph/gen_symmetric_links.rb +0 -64
- data/old/examples/pagerank/README.textile +0 -6
- data/old/examples/pagerank/gen_initial_pagerank_graph.pig +0 -57
- data/old/examples/pagerank/pagerank.rb +0 -72
- data/old/examples/pagerank/pagerank_initialize.rb +0 -42
- data/old/examples/pagerank/run_pagerank.sh +0 -21
- data/old/examples/sample_records.rb +0 -33
- data/old/examples/server_logs/apache_log_parser.rb +0 -15
- data/old/examples/server_logs/nook.rb +0 -48
- data/old/examples/server_logs/nook/faraday_dummy_adapter.rb +0 -94
- data/old/examples/server_logs/user_agent.rb +0 -40
- data/old/examples/simple_word_count.rb +0 -82
- data/old/examples/size.rb +0 -61
- data/old/examples/stats/avg_value_frequency.rb +0 -86
- data/old/examples/stats/binning_percentile_estimator.rb +0 -140
- data/old/examples/stats/data/avg_value_frequency.tsv +0 -3
- data/old/examples/stats/rank_and_bin.rb +0 -173
- data/old/examples/stupidly_simple_filter.rb +0 -40
- data/old/examples/word_count.rb +0 -75
- data/old/graph/graphviz_builder.rb +0 -580
- data/old/graph_easy/Attributes.pm +0 -4181
- data/old/graph_easy/Graphviz.pm +0 -2232
- data/old/wukong.rb +0 -18
- data/old/wukong/and_pig.rb +0 -38
- data/old/wukong/bad_record.rb +0 -18
- data/old/wukong/datatypes.rb +0 -24
- data/old/wukong/datatypes/enum.rb +0 -127
- data/old/wukong/datatypes/fake_types.rb +0 -17
- data/old/wukong/decorator.rb +0 -28
- data/old/wukong/encoding/asciize.rb +0 -108
- data/old/wukong/extensions.rb +0 -16
- data/old/wukong/extensions/array.rb +0 -18
- data/old/wukong/extensions/blank.rb +0 -93
- data/old/wukong/extensions/class.rb +0 -189
- data/old/wukong/extensions/date_time.rb +0 -53
- data/old/wukong/extensions/emittable.rb +0 -69
- data/old/wukong/extensions/enumerable.rb +0 -79
- data/old/wukong/extensions/hash.rb +0 -167
- data/old/wukong/extensions/hash_keys.rb +0 -16
- data/old/wukong/extensions/hash_like.rb +0 -150
- data/old/wukong/extensions/hashlike_class.rb +0 -47
- data/old/wukong/extensions/module.rb +0 -2
- data/old/wukong/extensions/pathname.rb +0 -27
- data/old/wukong/extensions/string.rb +0 -65
- data/old/wukong/extensions/struct.rb +0 -17
- data/old/wukong/extensions/symbol.rb +0 -11
- data/old/wukong/filename_pattern.rb +0 -74
- data/old/wukong/helper.rb +0 -7
- data/old/wukong/helper/stopwords.rb +0 -195
- data/old/wukong/helper/tokenize.rb +0 -35
- data/old/wukong/logger.rb +0 -38
- data/old/wukong/periodic_monitor.rb +0 -72
- data/old/wukong/schema.rb +0 -269
- data/old/wukong/script.rb +0 -286
- data/old/wukong/script/avro_command.rb +0 -5
- data/old/wukong/script/cassandra_loader_script.rb +0 -40
- data/old/wukong/script/emr_command.rb +0 -168
- data/old/wukong/script/hadoop_command.rb +0 -237
- data/old/wukong/script/local_command.rb +0 -41
- data/old/wukong/store.rb +0 -10
- data/old/wukong/store/base.rb +0 -27
- data/old/wukong/store/cassandra.rb +0 -10
- data/old/wukong/store/cassandra/streaming.rb +0 -75
- data/old/wukong/store/cassandra/struct_loader.rb +0 -21
- data/old/wukong/store/cassandra_model.rb +0 -91
- data/old/wukong/store/chh_chunked_flat_file_store.rb +0 -37
- data/old/wukong/store/chunked_flat_file_store.rb +0 -48
- data/old/wukong/store/conditional_store.rb +0 -57
- data/old/wukong/store/factory.rb +0 -8
- data/old/wukong/store/flat_file_store.rb +0 -89
- data/old/wukong/store/key_store.rb +0 -51
- data/old/wukong/store/null_store.rb +0 -15
- data/old/wukong/store/read_thru_store.rb +0 -22
- data/old/wukong/store/tokyo_tdb_key_store.rb +0 -33
- data/old/wukong/store/tyrant_rdb_key_store.rb +0 -57
- data/old/wukong/store/tyrant_tdb_key_store.rb +0 -20
- data/old/wukong/streamer.rb +0 -30
- data/old/wukong/streamer/accumulating_reducer.rb +0 -83
- data/old/wukong/streamer/base.rb +0 -126
- data/old/wukong/streamer/counting_reducer.rb +0 -25
- data/old/wukong/streamer/filter.rb +0 -20
- data/old/wukong/streamer/instance_streamer.rb +0 -15
- data/old/wukong/streamer/json_streamer.rb +0 -21
- data/old/wukong/streamer/line_streamer.rb +0 -12
- data/old/wukong/streamer/list_reducer.rb +0 -31
- data/old/wukong/streamer/rank_and_bin_reducer.rb +0 -145
- data/old/wukong/streamer/record_streamer.rb +0 -14
- data/old/wukong/streamer/reducer.rb +0 -11
- data/old/wukong/streamer/set_reducer.rb +0 -14
- data/old/wukong/streamer/struct_streamer.rb +0 -48
- data/old/wukong/streamer/summing_reducer.rb +0 -29
- data/old/wukong/streamer/uniq_by_last_reducer.rb +0 -51
- data/old/wukong/typed_struct.rb +0 -12
- data/spec/away/encoding_spec.rb +0 -32
- data/spec/away/exe_spec.rb +0 -20
- data/spec/away/flow_spec.rb +0 -82
- data/spec/away/graph_spec.rb +0 -6
- data/spec/away/job_spec.rb +0 -15
- data/spec/away/rake_compat_spec.rb +0 -9
- data/spec/away/script_spec.rb +0 -81
- data/spec/hanuman/graphviz_spec.rb +0 -29
- data/spec/hanuman/slot_spec.rb +0 -2
- data/spec/support/examples_helper.rb +0 -10
- data/spec/support/streamer_test_helpers.rb +0 -6
- data/spec/support/wukong_widget_helpers.rb +0 -66
- data/spec/wukong/processor_spec.rb +0 -109
- data/spec/wukong/widget/filter_spec.rb +0 -99
- data/spec/wukong/widget/stringifier_spec.rb +0 -51
- data/spec/wukong/workflow/command_spec.rb +0 -5
data/Rakefile
CHANGED
@@ -1,12 +1,28 @@
|
|
1
|
-
require 'bundler'
|
1
|
+
require 'bundler'
|
2
|
+
Bundler::GemHelper.install_tasks
|
2
3
|
|
3
4
|
require 'rspec/core/rake_task'
|
5
|
+
RSpec::Core::RakeTask.new(:specs)
|
6
|
+
|
4
7
|
require 'yard'
|
8
|
+
YARD::Rake::YardocTask.new
|
5
9
|
|
6
|
-
RSpec
|
7
|
-
|
10
|
+
desc 'Run RSpec with code coverage'
|
11
|
+
task :cov do
|
12
|
+
ENV['WUKONG_COV'] = true
|
13
|
+
Rake::Task[:specs].execute
|
8
14
|
end
|
9
15
|
|
10
|
-
YARD::Rake::YardocTask.new
|
11
|
-
|
12
16
|
task :default => :specs
|
17
|
+
|
18
|
+
desc "Create a TAGS file for this project"
|
19
|
+
task :tags do
|
20
|
+
files = [%w[Gemfile Guardfile Rakefile README.md].map { |b| File.join(File.dirname(__FILE__), b) }]
|
21
|
+
%w[bin examples lib spec].each do |dir|
|
22
|
+
files << Dir[File.join(File.dirname(__FILE__), "#{dir}/**/*.rb")]
|
23
|
+
end
|
24
|
+
files.each do |arry|
|
25
|
+
sh "etags", '-a', *arry unless arry.empty?
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
data/TODO.md
CHANGED
@@ -4,21 +4,21 @@
|
|
4
4
|
- hooks up source to flow,
|
5
5
|
- if iterated source, drives it, otherwise sits in the flow
|
6
6
|
|
7
|
-
* these set the contract for the inbound
|
7
|
+
* these set the contract for the inbound products
|
8
8
|
|
9
9
|
### slots
|
10
10
|
|
11
11
|
Typical case: one input, `:input`, one output `:output`
|
12
12
|
|
13
|
-
* there are as many
|
13
|
+
* there are as many products as
|
14
14
|
- the total number of action stage outputs
|
15
|
-
- the concrete input
|
15
|
+
- the concrete input products
|
16
16
|
* The number of rsrc->action edges is at most the total number of input slots
|
17
|
-
- (you cannot wire multiple
|
17
|
+
- (you cannot wire multiple products to the same input slot)
|
18
18
|
|
19
19
|
|
20
20
|
1. action stage B wires up to an action stage A (which really means "the full set of A's outputs")
|
21
|
-
2. I wire action A's output as production
|
21
|
+
2. I wire action A's output as production product X
|
22
22
|
3.
|
23
23
|
|
24
24
|
4. How do I address other stages?
|
@@ -71,7 +71,7 @@ __________________________________________________________________________
|
|
71
71
|
| foo |
|
72
72
|
----------
|
73
73
|
|
74
|
-
create a
|
74
|
+
create a product with no action? action with anonymous product, wired up later?
|
75
75
|
|
76
76
|
|
77
77
|
* connections:
|
@@ -0,0 +1,31 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding:UTF-8
|
3
|
+
|
4
|
+
if ARGV.include?('--help')
|
5
|
+
puts <<USAGE
|
6
|
+
wu-clean-encoding cleans malformed characters from stdin.
|
7
|
+
|
8
|
+
If a character is malformed, as defined by valid_encoding?,
|
9
|
+
it is replaced with a '�'.
|
10
|
+
|
11
|
+
wu-clean-encoding was built to work with UTF-8, and no
|
12
|
+
guarantees are provided for other encodings.
|
13
|
+
USAGE
|
14
|
+
exit(0)
|
15
|
+
end
|
16
|
+
|
17
|
+
ARGF.each do |line|
|
18
|
+
if line.valid_encoding?
|
19
|
+
$stdout.write line
|
20
|
+
else
|
21
|
+
repaired_line = []
|
22
|
+
line.each_char do |char|
|
23
|
+
if char.valid_encoding?
|
24
|
+
repaired_line << char
|
25
|
+
else
|
26
|
+
repaired_line << "�"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
$stdout.write repaired_line.join
|
30
|
+
end
|
31
|
+
end
|
data/bin/wu-lign
CHANGED
@@ -161,8 +161,8 @@ format = maxw.zip(col_types, col_minmag, col_maxmag, ARGV).map do |width, type,
|
|
161
161
|
case type
|
162
162
|
when :mixed, nil then lambda{|s| "%-#{width}s" % s }
|
163
163
|
when :str then lambda{|s| "%-#{width}s" % s }
|
164
|
-
when :int then lambda{|s| "%#{width}d" % s.gsub(
|
165
|
-
when :float then lambda{|s| "%#{maxmag+minmag+2}.#{minmag}f" % s.to_f }
|
164
|
+
when :int then lambda{|s| "%#{width}d" % s.gsub(/[^\d\-\+]+/, "").to_i }
|
165
|
+
when :float then lambda{|s| "%#{maxmag+minmag+2}.#{minmag}f" % s.gsub(/[^\d\.eE\-\+]+/, "").to_f }
|
166
166
|
else raise "oops type #{type}" end
|
167
167
|
end
|
168
168
|
|
data/bin/wu-local
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'wukong'
|
4
|
+
|
5
|
+
settings = Wukong::Local::Configuration
|
6
|
+
settings.use(:commandline)
|
7
|
+
|
8
|
+
def settings.usage
|
9
|
+
"usage: #{File.basename($0)} PROCESSOR|FLOW [ --param=value | -p value | --param | -p]"
|
10
|
+
end
|
11
|
+
|
12
|
+
settings.description = <<-EOF
|
13
|
+
wu-local is a tool for running Wukong processors and flows locally on
|
14
|
+
the command-line. Use wu-local by passing it a processor and feeding
|
15
|
+
in some data:
|
16
|
+
|
17
|
+
$ echo 'UNIX is Clever and Fun...' | wu-local tokenizer.rb
|
18
|
+
UNIX
|
19
|
+
is
|
20
|
+
Clever
|
21
|
+
and
|
22
|
+
Fun
|
23
|
+
|
24
|
+
If your processors have named fields you can pass them in as
|
25
|
+
arguments:
|
26
|
+
|
27
|
+
$ echo 'UNIX is clever and fun...' | wu-local tokenizer.rb --min_length=4
|
28
|
+
UNIX
|
29
|
+
Clever
|
30
|
+
|
31
|
+
You can chain processors and calls to wu-local together:
|
32
|
+
|
33
|
+
$ echo 'UNIX is clever and fun...' | wu-local tokenizer.rb --min_length=4 | wu-local downcaser.rb
|
34
|
+
unix
|
35
|
+
clever
|
36
|
+
|
37
|
+
Which is a good way to develop a combined data flow which you can
|
38
|
+
again test locally:
|
39
|
+
|
40
|
+
$ echo 'UNIX is clever and fun...' | wu-local tokenize_and_downcase_big_words.rb
|
41
|
+
unix
|
42
|
+
clever
|
43
|
+
EOF
|
44
|
+
|
45
|
+
settings.define :run, description: "Name of the processor or dataflow to use. Defaults to basename of the given path.", flag: 'r'
|
46
|
+
|
47
|
+
require 'wukong/boot' ; Wukong.boot!(settings)
|
48
|
+
|
49
|
+
thing = settings.rest.first
|
50
|
+
case
|
51
|
+
when thing.nil?
|
52
|
+
settings.dump_help
|
53
|
+
exit(1)
|
54
|
+
when Wukong.registry.registered?(thing.to_sym)
|
55
|
+
processor = thing.to_sym
|
56
|
+
when File.exist?(thing)
|
57
|
+
load thing
|
58
|
+
processor = settings.run || File.basename(thing, '.rb')
|
59
|
+
else
|
60
|
+
settings.dump_help
|
61
|
+
exit(2)
|
62
|
+
end
|
63
|
+
# p settings
|
64
|
+
begin
|
65
|
+
Wukong::LocalDriver.run(processor.to_sym, settings)
|
66
|
+
rescue Wukong::Error => e
|
67
|
+
$stderr.puts e.message
|
68
|
+
exit(3)
|
69
|
+
end
|
data/bin/wu-server
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'configliere'
|
3
|
+
require 'wukong'
|
4
|
+
require 'eventmachine'
|
5
|
+
require 'em-synchrony'
|
6
|
+
require 'multi_json'
|
7
|
+
|
8
|
+
Settings({
|
9
|
+
host: "localhost",
|
10
|
+
port: 9500
|
11
|
+
})
|
12
|
+
|
13
|
+
Settings.use :commandline
|
14
|
+
Settings.resolve!
|
15
|
+
|
16
|
+
# Load the file on the command line
|
17
|
+
wu_file = ARGV.shift
|
18
|
+
load wu_file
|
19
|
+
$processor = File.basename(wu_file, '.rb').to_sym
|
20
|
+
|
21
|
+
class Wukong::Server
|
22
|
+
def prepare(options = {})
|
23
|
+
dataflow_class = Wukong.dataflow(:server){ send(options[:processor]) }
|
24
|
+
flow_builder = Wukong.registry.retrieve(:server)
|
25
|
+
flow = flow_builder.build(processor: $processor)
|
26
|
+
|
27
|
+
@buffer = []
|
28
|
+
@processor = flow.stages.values.first
|
29
|
+
@processor.emitter = ->(value){ @buffer << value }
|
30
|
+
end
|
31
|
+
|
32
|
+
def process(record)
|
33
|
+
@process.process(record)
|
34
|
+
end
|
35
|
+
|
36
|
+
def cleanup
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
# EventMachine server
|
42
|
+
|
43
|
+
class WukongMachine < EM::Protocols::LineAndTextProtocol
|
44
|
+
def post_init
|
45
|
+
puts "[server] Client connected"
|
46
|
+
@wukong = WukongInterface.new
|
47
|
+
@wukong.prepare(processor: $processor)
|
48
|
+
end
|
49
|
+
|
50
|
+
def receive_data(data)
|
51
|
+
@buffer = []
|
52
|
+
input = MultiJson.load data
|
53
|
+
|
54
|
+
op = proc { @wukong.process(input) }
|
55
|
+
callback = proc { send_data MultiJson.dump(@buffer) + "\n" }
|
56
|
+
EM.defer(op, callback)
|
57
|
+
|
58
|
+
rescue MultiJson::DecodeError => ex
|
59
|
+
STDERR.puts "[server] Dropped: Malformed request"
|
60
|
+
end
|
61
|
+
|
62
|
+
def unbind
|
63
|
+
puts "[server] Client disconnected."
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
EM::run {
|
68
|
+
EM::start_server(Settings[:host], Settings[:port], WukongMachine)
|
69
|
+
puts "Listening on #{Settings[:host]}:#{Settings[:port]}"
|
70
|
+
}
|
data/examples/Gemfile
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
source :rubygems
|
2
|
+
|
3
|
+
gem "configliere", '~> 0.4'
|
4
|
+
gem "multi_json", '>= 1.3.6'
|
5
|
+
gem "vayacondios-client", '>= 0.0.3'
|
6
|
+
gem "gorillib", '>= 0.4.2'
|
7
|
+
gem "uuidtools"
|
8
|
+
gem "eventmachine"
|
9
|
+
gem "log4r"
|
10
|
+
|
11
|
+
group :examples do
|
12
|
+
gem "forgery"
|
13
|
+
gem "nokogiri"
|
14
|
+
# gem "sanitize"
|
15
|
+
gem "addressable"
|
16
|
+
gem "forgery"
|
17
|
+
gem "crack"
|
18
|
+
gem "oj"
|
19
|
+
gem "activesupport"
|
20
|
+
end
|
21
|
+
|
22
|
+
group :development do
|
23
|
+
gem "bundler", '~> 1.1'
|
24
|
+
gem "rake", '>= 0.9'
|
25
|
+
gem "rspec", '>= 2.8'
|
26
|
+
gem "guard", '>= 1.0'
|
27
|
+
gem "guard-rspec", '>= 0.6'
|
28
|
+
gem "simplecov", '>= 0.5'
|
29
|
+
gem "pry"
|
30
|
+
gem "ap"
|
31
|
+
end
|
32
|
+
|
33
|
+
group :docs do
|
34
|
+
gem "yard"
|
35
|
+
gem "redcarpet"
|
36
|
+
gem "addressable"
|
37
|
+
gem "htmlentities"
|
38
|
+
end
|
data/examples/README.md
ADDED
@@ -1,23 +1,30 @@
|
|
1
|
+
#
|
2
|
+
# Parses logs in either the [Apache Common Log Format](http://en.wikipedia.org/wiki/Common_Log_Format)
|
3
|
+
# or [Apache Combined Log Format](http://httpd.apache.org/docs/2.2/logs.html#combined)
|
4
|
+
#
|
5
|
+
# Common: `%h %l %u %t "%r" %>s %b`
|
6
|
+
# Combined: `%h %l %u %t "%r" %>s %b "%{Referer}i" "%{User-agent}i"`
|
7
|
+
#
|
1
8
|
class ApacheLogLine
|
2
9
|
include Gorillib::Model
|
3
10
|
|
4
|
-
field :
|
5
|
-
field :
|
6
|
-
field :
|
11
|
+
field :client, Hostname
|
12
|
+
field :rfc_1413, String
|
13
|
+
field :userid, String
|
7
14
|
field :log_timestamp, Time
|
8
15
|
field :http_method, String
|
9
|
-
field :
|
16
|
+
field :rsrc, String
|
10
17
|
field :protocol, String
|
11
18
|
field :response_code, Integer
|
12
19
|
field :size, Integer
|
13
|
-
field :referer, String
|
14
|
-
field :user_agent, String
|
20
|
+
# field :referer, String
|
21
|
+
# field :user_agent, String
|
15
22
|
|
16
23
|
def page_type
|
17
24
|
case
|
18
|
-
when
|
19
|
-
when
|
20
|
-
when
|
25
|
+
when rsrc =~ /\.(css|js)$/ then :asset
|
26
|
+
when rsrc =~ /\.(png|gif|ico)$/ then :image
|
27
|
+
when rsrc =~ /\.(pl|s?html?|asp|jsp|cgi)$/ then :page
|
21
28
|
else :other
|
22
29
|
end
|
23
30
|
end
|
@@ -25,37 +32,69 @@ class ApacheLogLine
|
|
25
32
|
#
|
26
33
|
# Regular expression to parse an apache log line.
|
27
34
|
#
|
35
|
+
# local - - [24/Oct/1994:13:43:13 -0600] "GET index.html HTTP/1.0" 200 3185
|
28
36
|
# 83.240.154.3 - - [07/Jun/2008:20:37:11 +0000] "GET /faq HTTP/1.1" 200 569 "http://infochimps.org/search?query=CAC" "Mozilla/5.0 (Windows; U; Windows NT 5.1; fr; rv:1.9.0.16) Gecko/2009120208 Firefox/3.0.16"
|
37
|
+
# whidbey.whidbey.com - - [04/Sep/1995:00:30:18 -0400] "GET /pub/sshay/images/btthumb.jpg" 200 4624
|
38
|
+
# jgbustam-ppp.clark.net - - [04/Sep/1995:00:00:28 -0400] "GET /pub/jgbustam/famosos/alpha.html HTTP/1.0" 304 -
|
29
39
|
#
|
30
|
-
|
31
|
-
(\S+) #
|
32
|
-
\s(\S+) #
|
33
|
-
\s(\S+) #
|
34
|
-
|
40
|
+
COMMON_LOG_RE = Regexp.compile(%r{\A
|
41
|
+
(\S+) # client 83.240.154.3
|
42
|
+
\s(\S+) # rfc_1413 -
|
43
|
+
\s(\S+) # userid -
|
44
|
+
\s\[([\w\:\+\-\ \/]+)\] # date part [07/Jun/2008:20:37:11 +0000]
|
35
45
|
\s\"(?:(\S+) # http_method "GET
|
36
|
-
\s(
|
37
|
-
|
38
|
-
\s(\d
|
39
|
-
\s(\d
|
40
|
-
|
41
|
-
|
42
|
-
\z}x)
|
46
|
+
\s(.+?) # rsrc /faq
|
47
|
+
(?:\s(HTTP/\d+\.\d+))?|-)\" # protocol HTTP/1.1"
|
48
|
+
\s(\d+|-) # response_code 200
|
49
|
+
\s(\d+|-) # size 569
|
50
|
+
\z
|
51
|
+
}x)
|
43
52
|
|
44
|
-
|
53
|
+
COMBINED_LOG_RE = Regexp.compile(%r{\A
|
54
|
+
(\S+) # client 83.240.154.3
|
55
|
+
\s(\S+) # rfc_1413 -
|
56
|
+
\s(\S+) # userid -
|
57
|
+
\s\[([\w\:\+\-\ \/]+)\] # date part [07/Jun/2008:20:37:11 +0000]
|
58
|
+
\s\"(?:(\S+) # http_method "GET
|
59
|
+
\s([^\"]+?) # rsrc /faq
|
60
|
+
(?:\s(HTTP/\d+\.\d+))?|-)\" # protocol HTTP/1.1"
|
61
|
+
\s(\d+|-) # response_code 200
|
62
|
+
\s(\d+|-) # size 569
|
63
|
+
(?:\s\"([^\"]*)\") # referer "http://infochimps.org/search?query=CAC"
|
64
|
+
(?:\s\"([^\"]*)\") # ua "Mozilla/5.0 (Windows; U; Windows NT 5.1; fr; rv:1.9.0.16) Gecko/2009120208 Firefox/3.0.16"
|
65
|
+
\z
|
66
|
+
}x)
|
67
|
+
|
68
|
+
# LOG_RE = Regexp.compile(%r{\A(\S+)\s})
|
69
|
+
|
70
|
+
MONTHS = { 'Jan' => 1, 'Feb' => 2, 'Mar' => 3, 'Apr' => 4, 'May' => 5, 'Jun' => 6, 'Jul' => 7, 'Aug' => 8, 'Sep' => 9, 'Oct' => 10, 'Nov' => 11, 'Dec' => 12, }
|
45
71
|
|
46
72
|
# Converts a time like `10/Apr/2007:10:58:27 +0300` to something parseable
|
47
73
|
def receive_log_timestamp(raw_ts)
|
74
|
+
return super(nil) if raw_ts.nil?
|
48
75
|
match = %r{(\d+)/(\w+)/(\d+):(\d+):(\d+):(\d+)\s([\+\-\w]+)}.match(raw_ts)
|
49
|
-
warn "Can't parse date #{raw_ts}"
|
76
|
+
unless match then warn "Can't parse date #{raw_ts}" ; return super(nil) ; end
|
77
|
+
#
|
50
78
|
day, month_name, year, hour, min, sec, tz = match.captures
|
51
79
|
month = MONTHS[month_name]
|
52
|
-
|
80
|
+
tz.insert(3, ':') # -0600 to -06:00
|
81
|
+
#
|
82
|
+
# super "#{year}-#{month}-#{day} #{hour}:#{min}:#{sec} #{tz}"
|
83
|
+
super Time.new(year.to_i, month, day.to_i, hour.to_i, min.to_i, sec.to_i, tz)
|
84
|
+
end
|
85
|
+
|
86
|
+
# @returns the log_timestamp in the common log format
|
87
|
+
def unparsed_log_timestamp
|
88
|
+
return if log_timestamp.blank?
|
89
|
+
log_timestamp.strftime("%d/%b/%Y:%H:%M:%S %z")
|
53
90
|
end
|
54
91
|
|
55
92
|
# Use the regex to break line into fields
|
56
93
|
# Emit each record as flat line
|
57
94
|
def self.make(line)
|
58
|
-
m =
|
95
|
+
m = COMMON_LOG_RE.match(line) or return
|
59
96
|
from_tuple *m.captures
|
97
|
+
rescue ArgumentError => err
|
98
|
+
raise unless err.message =~ /invalid byte sequence in UTF-8/
|
60
99
|
end
|
61
100
|
end
|
@@ -0,0 +1,101 @@
|
|
1
|
+
require 'wukong/widget/many_to_many'
|
2
|
+
require 'gorillib/enumerable/sum'
|
3
|
+
|
4
|
+
#
|
5
|
+
# An example dataflow --
|
6
|
+
#
|
7
|
+
|
8
|
+
Wukong.processor(:delay_buffer) do
|
9
|
+
attr_accessor :queue
|
10
|
+
field :delay, Integer, position: 0, doc: "number of records to hold in buffer"
|
11
|
+
|
12
|
+
def process(rec)
|
13
|
+
queue << rec
|
14
|
+
emit(next_item) if ready?
|
15
|
+
end
|
16
|
+
|
17
|
+
def next_item
|
18
|
+
queue.shift
|
19
|
+
end
|
20
|
+
|
21
|
+
# true if there are records at the end of the delay stage
|
22
|
+
def ready?
|
23
|
+
warn "Hmm, too many records in queue: #{queue}" if queue.size > delay+1
|
24
|
+
queue.size > delay
|
25
|
+
end
|
26
|
+
|
27
|
+
# resets to an empty state
|
28
|
+
def setup(*)
|
29
|
+
super
|
30
|
+
@queue = Array.new
|
31
|
+
end
|
32
|
+
|
33
|
+
# emits all remaining elements of the queue
|
34
|
+
def stop
|
35
|
+
queue.each{|rec| emit(rec) }
|
36
|
+
super
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
class Wukong::Batcher < Wukong::Processor
|
41
|
+
register_action
|
42
|
+
include Hanuman::Slottable
|
43
|
+
include Hanuman::OutputSlotted
|
44
|
+
|
45
|
+
attr_accessor :queues
|
46
|
+
consume :n_1, Integer, doc: "n-1'th value: the one just emitted"
|
47
|
+
consume :tictoc, Integer, doc: "input to drive flow"
|
48
|
+
consume :n_2, Integer, doc: "n-2'nd value: the one before the one just emitted"
|
49
|
+
|
50
|
+
# resets to an empty state, calls super
|
51
|
+
def initialize(*)
|
52
|
+
super
|
53
|
+
@queues = Hash.new{|h,k| h[k] = Array.new } # autovivifying
|
54
|
+
end
|
55
|
+
|
56
|
+
def process_input(channel, rec)
|
57
|
+
queues[channel] << rec
|
58
|
+
emit(next_item) if ready?
|
59
|
+
end
|
60
|
+
|
61
|
+
def next_item
|
62
|
+
queues.map{|_, queue| queue.shift }
|
63
|
+
end
|
64
|
+
|
65
|
+
# true if there is at least one record in each queue
|
66
|
+
def ready?
|
67
|
+
inslots.values.all?{|inslot| queues[inslot.name].length > 0 }
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
Wukong.chain(:fibonacci_series) do
|
72
|
+
|
73
|
+
delay_buffer(1, name: :my_delay)
|
74
|
+
|
75
|
+
# * I don't want to have to name everything
|
76
|
+
# - are few/some/most things named?
|
77
|
+
# * I must be able to have the same stage type on a graph more than once
|
78
|
+
# * If naming things is a general case, I want it to
|
79
|
+
# - be clean, and for it to
|
80
|
+
# - not cause a ruckus when stage type has its own args
|
81
|
+
#
|
82
|
+
|
83
|
+
batcher(name: :feedback) >
|
84
|
+
map(name: :summer, &:sum) >
|
85
|
+
many_to_many(name: :fibonacci_n)
|
86
|
+
|
87
|
+
spew(6, item: 0, name: :ticker) > feedback.tictoc
|
88
|
+
|
89
|
+
fibonacci_n > feedback.n_1
|
90
|
+
fibonacci_n > output
|
91
|
+
fibonacci_n > :delay > feedback.n_2
|
92
|
+
|
93
|
+
# preload the feedback buffer
|
94
|
+
feedback.n_1.process(0)
|
95
|
+
feedback.n_2.process(0)
|
96
|
+
feedback.n_2.process(1)
|
97
|
+
end
|
98
|
+
|
99
|
+
# Wukong.dataflow(:dump) do
|
100
|
+
# stdout << Wukong.dataflow(:fibbonaci_series).out
|
101
|
+
# end
|