alf 0.9.3 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.md +255 -129
- data/Gemfile +31 -1
- data/Gemfile.lock +17 -20
- data/LICENCE.md +1 -1
- data/Manifest.txt +2 -0
- data/README.md +37 -43
- data/TODO.md +1 -1
- data/alf.gemspec +10 -7
- data/alf.noespec +24 -13
- data/bin/alf +2 -2
- data/doc/commands/exec.md +16 -0
- data/doc/commands/help.md +11 -0
- data/doc/commands/main.md +33 -0
- data/doc/commands/show.md +19 -0
- data/doc/operators/non_relational/autonum.md +23 -0
- data/doc/operators/non_relational/clip.md +31 -0
- data/doc/operators/non_relational/coerce.md +15 -0
- data/doc/operators/non_relational/compact.md +20 -0
- data/doc/operators/non_relational/defaults.md +32 -0
- data/doc/operators/non_relational/generator.md +20 -0
- data/doc/operators/non_relational/sort.md +24 -0
- data/doc/operators/relational/extend.md +18 -0
- data/doc/operators/relational/group.md +27 -0
- data/doc/operators/relational/intersect.md +13 -0
- data/doc/operators/relational/join.md +27 -0
- data/doc/operators/relational/matching.md +20 -0
- data/doc/operators/relational/minus.md +12 -0
- data/doc/operators/relational/not-matching.md +20 -0
- data/doc/operators/relational/project.md +28 -0
- data/doc/operators/relational/quota.md +21 -0
- data/doc/operators/relational/rank.md +27 -0
- data/doc/operators/relational/rename.md +17 -0
- data/doc/operators/relational/restrict.md +25 -0
- data/doc/operators/relational/summarize.md +25 -0
- data/doc/operators/relational/ungroup.md +20 -0
- data/doc/operators/relational/union.md +14 -0
- data/doc/operators/relational/unwrap.md +20 -0
- data/doc/operators/relational/wrap.md +24 -0
- data/examples/csv/suppliers.csv +6 -0
- data/examples/logs/access.log +1000 -0
- data/examples/logs/combined.alf +2 -0
- data/examples/logs/hits.alf +14 -0
- data/examples/logs/not_found.alf +7 -0
- data/examples/logs/robots-cheating.alf +11 -0
- data/examples/logs/robots.alf +8 -0
- data/examples/northwind/customers.csv +92 -0
- data/examples/northwind/northwind.db +0 -0
- data/examples/northwind/orders.csv +831 -0
- data/examples/operators/clip.alf +1 -1
- data/examples/operators/database.alf +5 -6
- data/examples/operators/defaults.alf +1 -1
- data/examples/operators/group.alf +1 -1
- data/examples/operators/project.alf +2 -1
- data/examples/operators/pseudo-with.alf +2 -2
- data/examples/operators/quota.alf +2 -2
- data/examples/operators/summarize.alf +2 -2
- data/lib/alf/aggregator/aggregators.rb +77 -0
- data/lib/alf/aggregator/base.rb +95 -0
- data/lib/alf/aggregator/class_methods.rb +57 -0
- data/lib/alf/buffer/sorted.rb +48 -0
- data/lib/alf/command/class_methods.rb +27 -0
- data/lib/alf/command/doc_manager.rb +72 -0
- data/lib/alf/command/exec.rb +12 -0
- data/lib/alf/command/help.rb +31 -0
- data/lib/alf/command/main.rb +146 -0
- data/lib/alf/command/show.rb +33 -0
- data/lib/alf/environment/base.rb +37 -0
- data/lib/alf/environment/class_methods.rb +93 -0
- data/lib/alf/environment/explicit.rb +38 -0
- data/lib/alf/environment/folder.rb +62 -0
- data/lib/alf/extra/csv.rb +104 -0
- data/lib/alf/extra/logs.rb +100 -0
- data/lib/alf/extra/sequel.rb +77 -0
- data/lib/alf/{yaml.rb → extra/yaml.rb} +0 -0
- data/lib/alf/extra.rb +5 -0
- data/lib/alf/iterator/base.rb +38 -0
- data/lib/alf/iterator/class_methods.rb +22 -0
- data/lib/alf/iterator/proxy.rb +33 -0
- data/lib/alf/lispy/instance_methods.rb +157 -0
- data/lib/alf/operator/base.rb +74 -0
- data/lib/alf/operator/binary.rb +32 -0
- data/lib/alf/operator/cesure.rb +45 -0
- data/lib/alf/operator/class_methods.rb +132 -0
- data/lib/alf/operator/experimental.rb +9 -0
- data/lib/alf/operator/non_relational/autonum.rb +24 -0
- data/lib/alf/operator/non_relational/clip.rb +20 -0
- data/lib/alf/operator/non_relational/coerce.rb +21 -0
- data/lib/alf/operator/non_relational/compact.rb +62 -0
- data/lib/alf/operator/non_relational/defaults.rb +25 -0
- data/lib/alf/operator/non_relational/generator.rb +38 -0
- data/lib/alf/operator/non_relational/sort.rb +23 -0
- data/lib/alf/operator/nullary.rb +20 -0
- data/lib/alf/operator/relational/extend.rb +24 -0
- data/lib/alf/operator/relational/group.rb +32 -0
- data/lib/alf/operator/relational/intersect.rb +37 -0
- data/lib/alf/operator/relational/join.rb +106 -0
- data/lib/alf/operator/relational/matching.rb +45 -0
- data/lib/alf/operator/relational/minus.rb +37 -0
- data/lib/alf/operator/relational/not_matching.rb +45 -0
- data/lib/alf/operator/relational/project.rb +22 -0
- data/lib/alf/operator/relational/quota.rb +51 -0
- data/lib/alf/operator/relational/rank.rb +55 -0
- data/lib/alf/operator/relational/rename.rb +19 -0
- data/lib/alf/operator/relational/restrict.rb +20 -0
- data/lib/alf/operator/relational/summarize.rb +83 -0
- data/lib/alf/operator/relational/ungroup.rb +25 -0
- data/lib/alf/operator/relational/union.rb +32 -0
- data/lib/alf/operator/relational/unwrap.rb +21 -0
- data/lib/alf/operator/relational/wrap.rb +22 -0
- data/lib/alf/operator/shortcut.rb +53 -0
- data/lib/alf/operator/signature.rb +262 -0
- data/lib/alf/operator/transform.rb +27 -0
- data/lib/alf/operator/unary.rb +38 -0
- data/lib/alf/reader/alf_file.rb +24 -0
- data/lib/alf/reader/base.rb +119 -0
- data/lib/alf/reader/class_methods.rb +82 -0
- data/lib/alf/reader/rash.rb +28 -0
- data/lib/alf/relation/class_methods.rb +37 -0
- data/lib/alf/relation/instance_methods.rb +127 -0
- data/lib/alf/renderer/base.rb +72 -0
- data/lib/alf/renderer/class_methods.rb +58 -0
- data/lib/alf/renderer/rash.rb +19 -0
- data/lib/alf/{text.rb → renderer/text.rb} +1 -1
- data/lib/alf/tools/coerce.rb +14 -0
- data/lib/alf/tools/miscellaneous.rb +77 -0
- data/lib/alf/tools/to_lispy.rb +99 -0
- data/lib/alf/tools/to_ruby_literal.rb +14 -0
- data/lib/alf/tools/tuple_handle.rb +50 -0
- data/lib/alf/types/attr_list.rb +56 -0
- data/lib/alf/types/attr_name.rb +28 -0
- data/lib/alf/types/boolean.rb +12 -0
- data/lib/alf/types/heading.rb +96 -0
- data/lib/alf/types/ordering.rb +93 -0
- data/lib/alf/types/renaming.rb +57 -0
- data/lib/alf/types/summarization.rb +76 -0
- data/lib/alf/types/tuple_computation.rb +61 -0
- data/lib/alf/types/tuple_expression.rb +61 -0
- data/lib/alf/types/tuple_predicate.rb +49 -0
- data/lib/alf/version.rb +2 -2
- data/lib/alf.rb +193 -3714
- data/spec/integration/__database__/group.alf +1 -1
- data/spec/integration/__database__/suppliers_csv.csv +6 -0
- data/spec/integration/command/alf/alf.db +0 -0
- data/spec/integration/command/alf/alf_env_sqlite.cmd +1 -0
- data/spec/integration/command/alf/alf_env_sqlite.stdout +9 -0
- data/spec/integration/command/alf/alf_help.cmd +1 -0
- data/spec/integration/command/alf/alf_help.stdout +67 -0
- data/spec/integration/command/autonum/autonum_0.cmd +1 -1
- data/spec/integration/command/coerce/coerce_1.cmd +1 -0
- data/spec/integration/command/coerce/coerce_1.stdout +5 -0
- data/spec/integration/command/defaults/defaults_0.cmd +1 -1
- data/spec/integration/command/defaults/defaults_0.stdout +9 -9
- data/spec/integration/command/defaults/defaults_2.cmd +1 -0
- data/spec/integration/command/defaults/defaults_2.stdout +9 -0
- data/spec/integration/command/generator/generator_1.cmd +1 -0
- data/spec/integration/command/generator/generator_1.stdout +10 -0
- data/spec/integration/command/generator/generator_2.cmd +1 -0
- data/spec/integration/command/generator/generator_2.stdout +5 -0
- data/spec/integration/command/generator/generator_3.cmd +1 -0
- data/spec/integration/command/generator/generator_3.stdout +5 -0
- data/spec/integration/command/group/group_0.cmd +1 -1
- data/spec/integration/command/group/group_1.cmd +1 -1
- data/spec/integration/command/help/help_1.cmd +1 -0
- data/spec/integration/command/help/help_1.stdout +22 -0
- data/spec/integration/command/quota/quota_0.cmd +1 -1
- data/spec/integration/command/rank/rank_1.cmd +1 -1
- data/spec/integration/command/rank/rank_1.stdout +10 -10
- data/spec/integration/command/rank/rank_2.cmd +1 -1
- data/spec/integration/command/rank/rank_2.stdout +10 -10
- data/spec/integration/command/rank/rank_3.cmd +1 -1
- data/spec/integration/command/rank/rank_3.stdout +10 -10
- data/spec/integration/command/rank/rank_4.cmd +1 -1
- data/spec/integration/command/rank/rank_5.cmd +1 -1
- data/spec/integration/command/show/show_csv.cmd +1 -0
- data/spec/integration/command/show/show_csv.stdout +6 -0
- data/spec/integration/command/show/show_rash_2.cmd +1 -1
- data/spec/integration/command/show/show_rash_2.stdout +5 -5
- data/spec/integration/command/sort/sort_0.cmd +1 -1
- data/spec/integration/command/sort/sort_1.cmd +1 -1
- data/spec/integration/command/sort/sort_1.stdout +2 -2
- data/spec/integration/command/sort/sort_2.cmd +1 -0
- data/spec/integration/command/sort/sort_2.stdout +9 -0
- data/spec/integration/command/sort/sort_3.cmd +1 -0
- data/spec/integration/command/sort/sort_3.stdout +9 -0
- data/spec/integration/command/summarize/summarize_0.cmd +1 -1
- data/spec/integration/command/ungroup/ungroup_0.cmd +1 -1
- data/spec/integration/command/wrap/wrap_0.cmd +1 -1
- data/spec/integration/semantics/test_project.alf +5 -6
- data/spec/integration/semantics/test_rank.alf +16 -16
- data/spec/integration/test_command.rb +17 -6
- data/spec/integration/test_examples.rb +1 -1
- data/spec/regression/logs/apache_combined.log +5 -0
- data/spec/regression/logs/test_path_attribute.rb +25 -0
- data/spec/regression/relation/test_relation_allbut_all.rb +14 -0
- data/spec/shared/an_operator_class.rb +10 -5
- data/spec/spec_helper.rb +1 -7
- data/spec/unit/assumptions/test_set.rb +64 -0
- data/spec/unit/command/doc_manager/dynamic.md +1 -0
- data/spec/unit/command/doc_manager/example.md +1 -0
- data/spec/unit/command/doc_manager/example_1.txt +11 -0
- data/spec/unit/command/doc_manager/static.md +1 -0
- data/spec/unit/command/doc_manager/test_call.rb +49 -0
- data/spec/unit/csv/input.csv +3 -0
- data/spec/unit/csv/test_reader.rb +66 -0
- data/spec/unit/csv/test_renderer.rb +73 -0
- data/spec/unit/lispy/test_relation.rb +37 -0
- data/spec/unit/lispy/test_run.rb +40 -0
- data/spec/unit/lispy/test_tuple.rb +36 -0
- data/spec/unit/logs/apache_combined.log +5 -0
- data/spec/unit/logs/postgresql.log +29 -0
- data/spec/unit/logs/test_reader.rb +56 -0
- data/spec/unit/operator/non_relational/compact/{buffer_based.rb → test_buffer_based.rb} +0 -0
- data/spec/unit/operator/non_relational/test_clip.rb +1 -1
- data/spec/unit/operator/non_relational/test_coerce.rb +35 -0
- data/spec/unit/operator/non_relational/test_defaults.rb +15 -2
- data/spec/unit/operator/non_relational/test_generator.rb +78 -0
- data/spec/unit/operator/relational/join/test_hash_based.rb +4 -4
- data/spec/unit/operator/relational/matching/test_hash_based.rb +6 -6
- data/spec/unit/operator/relational/not_matching/test_hash_based.rb +4 -4
- data/spec/unit/operator/relational/summarize/test_hash_based.rb +10 -6
- data/spec/unit/operator/relational/summarize/test_sort_based.rb +18 -7
- data/spec/unit/operator/relational/test_group.rb +8 -8
- data/spec/unit/operator/relational/test_intersect.rb +3 -3
- data/spec/unit/operator/relational/test_minus.rb +3 -3
- data/spec/unit/operator/relational/test_project.rb +12 -2
- data/spec/unit/operator/relational/test_quota.rb +5 -6
- data/spec/unit/operator/relational/test_summarize.rb +9 -11
- data/spec/unit/operator/relational/test_union.rb +1 -1
- data/spec/unit/operator/relational/test_wrap.rb +1 -1
- data/spec/unit/operator/signature/test_collect_on.rb +45 -0
- data/spec/unit/operator/signature/test_initialize.rb +17 -0
- data/spec/unit/operator/signature/test_install.rb +56 -0
- data/spec/unit/operator/signature/test_option_parser.rb +36 -0
- data/spec/unit/operator/signature/test_parse_args.rb +60 -0
- data/spec/unit/operator/signature/test_parse_argv.rb +87 -0
- data/spec/unit/operator/signature/test_to_lispy.rb +102 -0
- data/spec/unit/operator/signature/test_to_shell.rb +103 -0
- data/spec/unit/operator/test_non_relational.rb +3 -1
- data/spec/unit/relation/test_relops.rb +20 -15
- data/spec/unit/sequel/alf.db +0 -0
- data/spec/unit/sequel/test_environment.rb +54 -0
- data/spec/unit/test_aggregator.rb +32 -22
- data/spec/unit/test_environment.rb +5 -0
- data/spec/unit/test_lispy.rb +4 -0
- data/spec/unit/test_relation.rb +5 -0
- data/spec/unit/text/test_cell.rb +6 -6
- data/spec/unit/text/test_row.rb +3 -3
- data/spec/unit/text/test_table.rb +6 -6
- data/spec/unit/tools/test_coalesce.rb +15 -0
- data/spec/unit/tools/test_coerce.rb +10 -0
- data/spec/unit/tools/test_to_lispy.rb +138 -0
- data/spec/unit/tools/test_to_ruby_literal.rb +10 -0
- data/spec/unit/tools/test_tuple_handle.rb +1 -59
- data/spec/unit/types/test_attr_list.rb +106 -0
- data/spec/unit/types/test_attr_name.rb +52 -0
- data/spec/unit/{test_heading.rb → types/test_heading.rb} +10 -0
- data/spec/unit/types/test_ordering.rb +127 -0
- data/spec/unit/types/test_renaming.rb +55 -0
- data/spec/unit/types/test_summarization.rb +63 -0
- data/spec/unit/types/test_tuple_computation.rb +60 -0
- data/spec/unit/types/test_tuple_expression.rb +64 -0
- data/spec/unit/types/test_tuple_predicate.rb +79 -0
- data/tasks/debug_mail.rake +1 -1
- data/tasks/debug_mail.txt +5 -0
- data/tasks/gh-pages.rake +63 -0
- metadata +325 -52
- data/spec/unit/operator/test_command_methods.rb +0 -38
- data/spec/unit/tools/test_ordering_key.rb +0 -94
- data/spec/unit/tools/test_parse_commandline_args.rb +0 -47
- data/spec/unit/tools/test_projection_key.rb +0 -83
@@ -0,0 +1,37 @@
|
|
1
|
+
module Alf
|
2
|
+
class Environment
|
3
|
+
module Base
|
4
|
+
|
5
|
+
#
|
6
|
+
# Returns a dataset whose name is provided.
|
7
|
+
#
|
8
|
+
# This method resolves named datasets to tuple enumerables. When the
|
9
|
+
# dataset exists, this method must return an Iterator, typically a
|
10
|
+
# Reader instance. Otherwise, it must throw a NoSuchDatasetError.
|
11
|
+
#
|
12
|
+
# @param [Symbol] name the name of a dataset
|
13
|
+
# @return [Iterator] an iterator, typically a Reader instance
|
14
|
+
# @raise [NoSuchDatasetError] when the dataset does not exists
|
15
|
+
#
|
16
|
+
def dataset(name)
|
17
|
+
end
|
18
|
+
undef :dataset
|
19
|
+
|
20
|
+
#
|
21
|
+
# Branches this environment and puts some additional explicit
|
22
|
+
# definitions.
|
23
|
+
#
|
24
|
+
# This method is provided for (with ...) expressions and should not
|
25
|
+
# be overriden by subclasses.
|
26
|
+
#
|
27
|
+
# @param [Hash] a set of (name, Iterator) pairs.
|
28
|
+
# @return [Environment] an environment instance with new definitions set
|
29
|
+
#
|
30
|
+
def branch(defs)
|
31
|
+
Explicit.new(defs, self)
|
32
|
+
end
|
33
|
+
|
34
|
+
end # module Base
|
35
|
+
include(Base)
|
36
|
+
end # class Environment
|
37
|
+
end # module Alf
|
@@ -0,0 +1,93 @@
|
|
1
|
+
module Alf
|
2
|
+
class Environment
|
3
|
+
module ClassMethods
|
4
|
+
|
5
|
+
#
|
6
|
+
# Returns registered environments
|
7
|
+
#
|
8
|
+
def environments
|
9
|
+
@environments ||= []
|
10
|
+
end
|
11
|
+
|
12
|
+
#
|
13
|
+
# Register an environment class under a specific name.
|
14
|
+
#
|
15
|
+
# Registered class must implement a recognizes? method that takes an array
|
16
|
+
# of arguments; it must returns true if an environment instance can be built
|
17
|
+
# using those arguments, false otherwise. Please be very specific in the
|
18
|
+
# implementation for returning true. See also autodetect and recognizes?
|
19
|
+
#
|
20
|
+
# @param [Symbol] name name of the environment kind
|
21
|
+
# @param [Class] clazz class that implemented the environment
|
22
|
+
#
|
23
|
+
def register(name, clazz)
|
24
|
+
environments << [name, clazz]
|
25
|
+
(class << self; self; end).
|
26
|
+
send(:define_method, name) do |*args|
|
27
|
+
clazz.new(*args)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
#
|
32
|
+
# Auto-detect the environment to use for specific arguments.
|
33
|
+
#
|
34
|
+
# This method returns an instance of the first registered Environment class
|
35
|
+
# that returns true to an invocation of recognizes?(args). It raises an
|
36
|
+
# ArgumentError if no such class can be found.
|
37
|
+
#
|
38
|
+
# @return [Environment] an environment instance
|
39
|
+
# @raise [ArgumentError] when no registered class recognizes the arguments
|
40
|
+
#
|
41
|
+
def autodetect(*args)
|
42
|
+
if (args.size == 1) && args.first.is_a?(Environment)
|
43
|
+
return args.first
|
44
|
+
else
|
45
|
+
environments.each do |name,clazz|
|
46
|
+
return clazz.new(*args) if clazz.recognizes?(args)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
raise ArgumentError, "Unable to auto-detect Environment with #{args.inspect}"
|
50
|
+
end
|
51
|
+
|
52
|
+
#
|
53
|
+
# (see Environment.autodetect)
|
54
|
+
#
|
55
|
+
def coerce(*args)
|
56
|
+
autodetect(*args)
|
57
|
+
end
|
58
|
+
|
59
|
+
#
|
60
|
+
# Returns true _args_ can be used for building an environment instance,
|
61
|
+
# false otherwise.
|
62
|
+
#
|
63
|
+
# When returning true, an immediate invocation of new(*args) should
|
64
|
+
# succeed. While runtime exception are admitted (no such database, for
|
65
|
+
# example), argument errors should not occur (missing argument, wrong
|
66
|
+
# typing, etc.).
|
67
|
+
#
|
68
|
+
# Please be specific in the implementation of this extension point, as
|
69
|
+
# registered environments for a chain and each of them should have a
|
70
|
+
# chance of being selected.
|
71
|
+
#
|
72
|
+
def recognizes?(args)
|
73
|
+
false
|
74
|
+
end
|
75
|
+
|
76
|
+
#
|
77
|
+
# Returns the default environment
|
78
|
+
#
|
79
|
+
def default
|
80
|
+
examples
|
81
|
+
end
|
82
|
+
|
83
|
+
#
|
84
|
+
# Returns the examples environment
|
85
|
+
#
|
86
|
+
def examples
|
87
|
+
folder File.expand_path('../../../../examples/operators', __FILE__)
|
88
|
+
end
|
89
|
+
|
90
|
+
end # module ClassMethods
|
91
|
+
extend(ClassMethods)
|
92
|
+
end # class Environment
|
93
|
+
end # module Alf
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Alf
|
2
|
+
class Environment
|
3
|
+
#
|
4
|
+
# Specialization of Environment that works with explicitely defined
|
5
|
+
# datasources and allow branching and unbranching.
|
6
|
+
#
|
7
|
+
class Explicit < Environment
|
8
|
+
|
9
|
+
#
|
10
|
+
# Creates a new environment instance with initial definitions
|
11
|
+
# and optional child environment.
|
12
|
+
#
|
13
|
+
def initialize(defs = {}, child = nil)
|
14
|
+
@defs = defs
|
15
|
+
@child = child
|
16
|
+
end
|
17
|
+
|
18
|
+
#
|
19
|
+
# Unbranches this environment and returns its child
|
20
|
+
#
|
21
|
+
def unbranch
|
22
|
+
@child
|
23
|
+
end
|
24
|
+
|
25
|
+
# (see Environment#dataset)
|
26
|
+
def dataset(name)
|
27
|
+
if @defs.has_key?(name)
|
28
|
+
@defs[name]
|
29
|
+
elsif @child
|
30
|
+
@child.dataset(name)
|
31
|
+
else
|
32
|
+
raise "No such dataset #{name}"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
end # class Explicit
|
37
|
+
end # class Environment
|
38
|
+
end # module Alf
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module Alf
|
2
|
+
class Environment
|
3
|
+
#
|
4
|
+
# Specialization of Environment to work on files of a given folder.
|
5
|
+
#
|
6
|
+
# This kind of environment resolves datasets by simply looking at
|
7
|
+
# recognized files in a specific folder. "Recognized" files are simply
|
8
|
+
# those for which a Reader subclass has been previously registered.
|
9
|
+
# This environment then serves reader instances.
|
10
|
+
#
|
11
|
+
class Folder < Environment
|
12
|
+
|
13
|
+
#
|
14
|
+
# (see Environment.recognizes?)
|
15
|
+
#
|
16
|
+
# Returns true if args contains onely a String which is an existing
|
17
|
+
# folder.
|
18
|
+
#
|
19
|
+
def self.recognizes?(args)
|
20
|
+
(args.size == 1) &&
|
21
|
+
args.first.is_a?(String) &&
|
22
|
+
File.directory?(args.first.to_s)
|
23
|
+
end
|
24
|
+
|
25
|
+
#
|
26
|
+
# Creates an environment instance, wired to the specified folder.
|
27
|
+
#
|
28
|
+
# @param [String] folder path to the folder to use as dataset source
|
29
|
+
#
|
30
|
+
def initialize(folder)
|
31
|
+
@folder = folder
|
32
|
+
end
|
33
|
+
|
34
|
+
# (see Environment#dataset)
|
35
|
+
def dataset(name)
|
36
|
+
if file = find_file(name)
|
37
|
+
Reader.reader(file, self)
|
38
|
+
else
|
39
|
+
raise "No such dataset #{name} (#{@folder})"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
protected
|
44
|
+
|
45
|
+
def find_file(name)
|
46
|
+
# TODO: refactor this, because it allows getting out of the folder
|
47
|
+
if File.exists?(name.to_s)
|
48
|
+
name.to_s
|
49
|
+
elsif File.exists?(explicit = File.join(@folder, name.to_s)) &&
|
50
|
+
File.file?(explicit)
|
51
|
+
explicit
|
52
|
+
else
|
53
|
+
Dir[File.join(@folder, "#{name}.*")].find do |f|
|
54
|
+
File.file?(f)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
Environment.register(:folder, self)
|
60
|
+
end # class Folder
|
61
|
+
end # class Environment
|
62
|
+
end # module Alf
|
@@ -0,0 +1,104 @@
|
|
1
|
+
module Alf
|
2
|
+
module CSV
|
3
|
+
|
4
|
+
#
|
5
|
+
# Provides common tooling to CSV renderer and readers
|
6
|
+
#
|
7
|
+
module Commons
|
8
|
+
|
9
|
+
DEFAULT_OPTIONS = {
|
10
|
+
:headers => true
|
11
|
+
}
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
#
|
16
|
+
# Returns CSV in ruby 1.9 and FasterCSV for ruby < 1.9.
|
17
|
+
# This method handles require as well.
|
18
|
+
#
|
19
|
+
def get_csv_class
|
20
|
+
if RUBY_VERSION >= "1.9"
|
21
|
+
require 'csv'
|
22
|
+
::CSV
|
23
|
+
else
|
24
|
+
::Alf::Tools::friendly_require('fastercsv')
|
25
|
+
::FasterCSV
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
#
|
30
|
+
# Returns a CSV instance bound to a given io and options
|
31
|
+
#
|
32
|
+
def get_csv(io)
|
33
|
+
get_csv_class.new(io, options)
|
34
|
+
end
|
35
|
+
|
36
|
+
end # module Commons
|
37
|
+
|
38
|
+
#
|
39
|
+
# Implements Alf::Renderer contract for outputting CSV files.
|
40
|
+
#
|
41
|
+
class Renderer < Alf::Renderer
|
42
|
+
include CSV::Commons
|
43
|
+
|
44
|
+
protected
|
45
|
+
|
46
|
+
# (see Renderer#render)
|
47
|
+
def render(input, output)
|
48
|
+
csv = get_csv(output)
|
49
|
+
header = nil
|
50
|
+
input.each do |tuple|
|
51
|
+
unless header
|
52
|
+
header = extract_header(tuple)
|
53
|
+
csv << header.collect{|k| k.to_s}
|
54
|
+
end
|
55
|
+
csv << extract_row(tuple, header)
|
56
|
+
end
|
57
|
+
output
|
58
|
+
end
|
59
|
+
|
60
|
+
private
|
61
|
+
|
62
|
+
def extract_header(tuple)
|
63
|
+
tuple.keys
|
64
|
+
end
|
65
|
+
|
66
|
+
def extract_row(tuple, header)
|
67
|
+
header.collect{|k| tuple[k]}
|
68
|
+
end
|
69
|
+
|
70
|
+
::Alf::Renderer.register(:csv, "as a csv file", self)
|
71
|
+
end # class Renderer
|
72
|
+
|
73
|
+
#
|
74
|
+
# Implements Alf::Reader contract for reading CSV files.
|
75
|
+
#
|
76
|
+
class Reader < Alf::Reader
|
77
|
+
include CSV::Commons
|
78
|
+
|
79
|
+
def each
|
80
|
+
with_input_io do |io|
|
81
|
+
block = Proc.new{|row|
|
82
|
+
next if row.header_row?
|
83
|
+
yield(symbolize_keys(row.to_hash))
|
84
|
+
}
|
85
|
+
case io
|
86
|
+
when StringIO
|
87
|
+
get_csv_class.parse(io.string, options, &block)
|
88
|
+
else
|
89
|
+
get_csv(io).each(&block)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
private
|
95
|
+
|
96
|
+
def symbolize_keys(h)
|
97
|
+
Hash[h.collect{|k,v| [k.to_sym,v] }]
|
98
|
+
end
|
99
|
+
|
100
|
+
::Alf::Reader.register(:csv, [".csv"], self)
|
101
|
+
end # class Reader
|
102
|
+
|
103
|
+
end # module CSV
|
104
|
+
end # module Alf
|
@@ -0,0 +1,100 @@
|
|
1
|
+
module Alf
|
2
|
+
module Logs
|
3
|
+
|
4
|
+
#
|
5
|
+
# Implements Alf::Reader contract for reading log files.
|
6
|
+
#
|
7
|
+
class Reader < Alf::Reader
|
8
|
+
|
9
|
+
DEFAULT_OPTIONS = {
|
10
|
+
:file_format => nil,
|
11
|
+
:line_def => :access
|
12
|
+
}
|
13
|
+
|
14
|
+
attr_reader :options
|
15
|
+
|
16
|
+
def initialize(*args)
|
17
|
+
Alf::Tools::friendly_require('request_log_analyzer')
|
18
|
+
super(*args)
|
19
|
+
@options[:file_format] = coerce_file_format(@options[:file_format])
|
20
|
+
end
|
21
|
+
|
22
|
+
def each
|
23
|
+
parser = infer_parser(input_path)
|
24
|
+
with_input_io do |io|
|
25
|
+
parser.parse_stream(io) do |req|
|
26
|
+
yield request_to_tuple(req)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def coerce_file_format(file_format)
|
34
|
+
case file_format
|
35
|
+
when NilClass
|
36
|
+
nil
|
37
|
+
when RequestLogAnalyzer::FileFormat
|
38
|
+
file_format
|
39
|
+
when Symbol
|
40
|
+
RequestLogAnalyzer::FileFormat.load(file_format)
|
41
|
+
when Array
|
42
|
+
RequestLogAnalyzer::FileFormat.load(*file_format)
|
43
|
+
else
|
44
|
+
raise ArgumentError, "Invalid file format: #{file_format}"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def infer_parser(path)
|
49
|
+
file_format = @options[:file_format] || begin
|
50
|
+
unless path
|
51
|
+
raise NotImplementedError, "Logs::Reader does not work on IO for now"
|
52
|
+
end
|
53
|
+
RequestLogAnalyzer::FileFormat.autodetect(path)
|
54
|
+
end
|
55
|
+
RequestLogAnalyzer::Source::LogParser.new(file_format)
|
56
|
+
end
|
57
|
+
|
58
|
+
def request_to_tuple(req)
|
59
|
+
req.attributes
|
60
|
+
end
|
61
|
+
|
62
|
+
LOG_TYPES = {
|
63
|
+
# RequestLogAnalyzer::Request::Converters
|
64
|
+
:string => String,
|
65
|
+
:float => Float,
|
66
|
+
:decimal => Float,
|
67
|
+
:int => Integer,
|
68
|
+
:integer => Integer,
|
69
|
+
:sym => Symbol,
|
70
|
+
:symbol => Symbol,
|
71
|
+
:timestamp => Integer,
|
72
|
+
:traffic => Integer,
|
73
|
+
:duration => Float,
|
74
|
+
:epoch => Integer,
|
75
|
+
# AmazonS3
|
76
|
+
:nillable_string => String,
|
77
|
+
:referer => String,
|
78
|
+
:user_agent => String,
|
79
|
+
# Apache
|
80
|
+
:path => String,
|
81
|
+
# MySQL
|
82
|
+
:sql => String
|
83
|
+
}
|
84
|
+
|
85
|
+
def infer_heading(format, line_def = :access)
|
86
|
+
h = Hash[format.line_definitions[line_def].captures.collect{|capt|
|
87
|
+
[ capt[:name], to_type(capt[:type]) ]
|
88
|
+
}]
|
89
|
+
Alf::Heading.new(h)
|
90
|
+
end
|
91
|
+
|
92
|
+
def to_type(log_type)
|
93
|
+
LOG_TYPES[log_type] || String
|
94
|
+
end
|
95
|
+
|
96
|
+
::Alf::Reader.register(:logs, [".log"], self)
|
97
|
+
end # class Reader
|
98
|
+
|
99
|
+
end # module Logs
|
100
|
+
end # module Alf
|
@@ -0,0 +1,77 @@
|
|
1
|
+
module Alf
|
2
|
+
module Sequel
|
3
|
+
|
4
|
+
#
|
5
|
+
# Specialization of Alf::Environment to distribute Sequel datasets
|
6
|
+
#
|
7
|
+
class Environment < ::Alf::Environment
|
8
|
+
|
9
|
+
#
|
10
|
+
# (see Alf::Environment.recognizes?)
|
11
|
+
#
|
12
|
+
# Returns true if args contains one String that can be interpreted as
|
13
|
+
# a valid database uri.
|
14
|
+
#
|
15
|
+
def self.recognizes?(args)
|
16
|
+
require 'uri'
|
17
|
+
return false unless (args.size == 1) && args.first.is_a?(String)
|
18
|
+
uri = URI::parse(args.first)
|
19
|
+
if uri.scheme || looks_a_sqlite_file?(args.first)
|
20
|
+
true
|
21
|
+
else
|
22
|
+
false
|
23
|
+
end
|
24
|
+
rescue ::URI::Error
|
25
|
+
false
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.looks_a_sqlite_file?(f)
|
29
|
+
(File.file?(f) && File.extname(f).==(".db"))
|
30
|
+
end
|
31
|
+
|
32
|
+
# Creates an Environment instance
|
33
|
+
def initialize(uri, options = {})
|
34
|
+
@uri = self.class.looks_a_sqlite_file?(uri) ? "sqlite://#{uri}" : uri
|
35
|
+
@options = options
|
36
|
+
end
|
37
|
+
|
38
|
+
#
|
39
|
+
# (see Alf::Environment#dataset)
|
40
|
+
#
|
41
|
+
def dataset(name)
|
42
|
+
Iterator.new(connect[name])
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
# Creates a database connection
|
48
|
+
def connect
|
49
|
+
Alf::Tools::friendly_require('sequel')
|
50
|
+
@db ||= ::Sequel.connect(@uri, @options)
|
51
|
+
end
|
52
|
+
|
53
|
+
::Alf::Environment.register(:sequel, self)
|
54
|
+
end # class Environment
|
55
|
+
|
56
|
+
# Specialization of Alg::Iterator to work on a Sequel dataset
|
57
|
+
class Iterator
|
58
|
+
include ::Alf::Iterator
|
59
|
+
|
60
|
+
def initialize(dataset)
|
61
|
+
@dataset = dataset
|
62
|
+
end
|
63
|
+
|
64
|
+
# (see Alf::Iterator#each)
|
65
|
+
def each
|
66
|
+
@dataset.each(&Proc.new)
|
67
|
+
end
|
68
|
+
|
69
|
+
# (see Alf::Iterator#pipe)
|
70
|
+
def pipe(input, env = nil)
|
71
|
+
self
|
72
|
+
end
|
73
|
+
|
74
|
+
end # class Iterator
|
75
|
+
|
76
|
+
end # module Sequel
|
77
|
+
end # module Alf
|
File without changes
|
data/lib/alf/extra.rb
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
module Alf
|
2
|
+
module Iterator
|
3
|
+
module Base
|
4
|
+
|
5
|
+
#
|
6
|
+
# Wire the iterator input and an optional execution environment.
|
7
|
+
#
|
8
|
+
# Iterators (typically Reader and Operator instances) work from input data
|
9
|
+
# that come from files, or other operators, and so on. This method wires
|
10
|
+
# this input data to the iterator. Wiring is required before any attempt
|
11
|
+
# to call each, unless autowiring occurs at construction. The exact kind of
|
12
|
+
# input object is left at discretion of Iterator implementations.
|
13
|
+
#
|
14
|
+
# @param [Object] input the iterator input, at discretion of the Iterator
|
15
|
+
# implementation.
|
16
|
+
# @param [Environment] environment an optional environment for resolving
|
17
|
+
# named datasets if needed.
|
18
|
+
# @return [Object] self
|
19
|
+
#
|
20
|
+
def pipe(input, environment = nil)
|
21
|
+
self
|
22
|
+
end
|
23
|
+
undef :pipe
|
24
|
+
|
25
|
+
#
|
26
|
+
# Converts this iterator to an in-memory Relation.
|
27
|
+
#
|
28
|
+
# @return [Relation] a relation instance, as the set of tuples
|
29
|
+
# that would be yield by this iterator.
|
30
|
+
#
|
31
|
+
def to_rel
|
32
|
+
Relation::coerce(self)
|
33
|
+
end
|
34
|
+
|
35
|
+
end # module Base
|
36
|
+
include(Base)
|
37
|
+
end # module Iterator
|
38
|
+
end # module Alf
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Alf
|
2
|
+
module Iterator
|
3
|
+
module ClassMethods
|
4
|
+
|
5
|
+
#
|
6
|
+
# Coerces something to an iterator
|
7
|
+
#
|
8
|
+
def coerce(arg, environment = nil)
|
9
|
+
case arg
|
10
|
+
when Iterator, Array
|
11
|
+
arg
|
12
|
+
when String, Symbol
|
13
|
+
Proxy.new(environment, arg.to_sym)
|
14
|
+
else
|
15
|
+
Reader.coerce(arg, environment)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
end # module ClassMethods
|
20
|
+
extend(ClassMethods)
|
21
|
+
end # module Iterator
|
22
|
+
end # module Alf
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module Alf
|
2
|
+
module Iterator
|
3
|
+
class Proxy
|
4
|
+
include Iterator
|
5
|
+
|
6
|
+
# @return [Symbol] name of the dataset to request to environment
|
7
|
+
attr_reader :dataset
|
8
|
+
|
9
|
+
#
|
10
|
+
# Creates a proxy instance.
|
11
|
+
#
|
12
|
+
# @param [Environment] env the environment serving iterator instances
|
13
|
+
# @param [Symbol] dataset named dataset to rely on
|
14
|
+
#
|
15
|
+
def initialize(env, dataset)
|
16
|
+
@environment, @dataset = env, dataset
|
17
|
+
end
|
18
|
+
|
19
|
+
# (see Iterator#pipe)
|
20
|
+
def pipe(input, environment = nil)
|
21
|
+
@environment ||= environment
|
22
|
+
@dataset ||= input
|
23
|
+
self
|
24
|
+
end
|
25
|
+
|
26
|
+
# (see Iterator#each)
|
27
|
+
def each(&block)
|
28
|
+
@environment.dataset(@dataset).each(&block)
|
29
|
+
end
|
30
|
+
|
31
|
+
end # class Proxy
|
32
|
+
end # module Iterator
|
33
|
+
end # module Alf
|