opener-tokenizer-base 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +148 -0
  3. data/bin/tokenizer-base +5 -0
  4. data/bin/tokenizer-de +5 -0
  5. data/bin/tokenizer-en +5 -0
  6. data/bin/tokenizer-es +5 -0
  7. data/bin/tokenizer-fr +5 -0
  8. data/bin/tokenizer-it +5 -0
  9. data/bin/tokenizer-nl +5 -0
  10. data/core/lib/Data/OptList.pm +256 -0
  11. data/core/lib/Params/Util.pm +866 -0
  12. data/core/lib/Sub/Exporter.pm +1101 -0
  13. data/core/lib/Sub/Exporter/Cookbook.pod +309 -0
  14. data/core/lib/Sub/Exporter/Tutorial.pod +280 -0
  15. data/core/lib/Sub/Exporter/Util.pm +354 -0
  16. data/core/lib/Sub/Install.pm +329 -0
  17. data/core/lib/Time/Stamp.pm +808 -0
  18. data/core/load-prefixes.pl +43 -0
  19. data/core/nonbreaking_prefixes/abbreviation_list.kaf +0 -0
  20. data/core/nonbreaking_prefixes/abbreviation_list.txt +444 -0
  21. data/core/nonbreaking_prefixes/nonbreaking_prefix.ca +533 -0
  22. data/core/nonbreaking_prefixes/nonbreaking_prefix.de +781 -0
  23. data/core/nonbreaking_prefixes/nonbreaking_prefix.el +448 -0
  24. data/core/nonbreaking_prefixes/nonbreaking_prefix.en +564 -0
  25. data/core/nonbreaking_prefixes/nonbreaking_prefix.es +758 -0
  26. data/core/nonbreaking_prefixes/nonbreaking_prefix.fr +1027 -0
  27. data/core/nonbreaking_prefixes/nonbreaking_prefix.is +697 -0
  28. data/core/nonbreaking_prefixes/nonbreaking_prefix.it +641 -0
  29. data/core/nonbreaking_prefixes/nonbreaking_prefix.nl +739 -0
  30. data/core/nonbreaking_prefixes/nonbreaking_prefix.pl +729 -0
  31. data/core/nonbreaking_prefixes/nonbreaking_prefix.pt +656 -0
  32. data/core/nonbreaking_prefixes/nonbreaking_prefix.ro +484 -0
  33. data/core/nonbreaking_prefixes/nonbreaking_prefix.ru +705 -0
  34. data/core/nonbreaking_prefixes/nonbreaking_prefix.sk +920 -0
  35. data/core/nonbreaking_prefixes/nonbreaking_prefix.sl +524 -0
  36. data/core/nonbreaking_prefixes/nonbreaking_prefix.sv +492 -0
  37. data/core/split-sentences.pl +114 -0
  38. data/core/text-fixer.pl +169 -0
  39. data/core/tokenizer-cli.pl +363 -0
  40. data/core/tokenizer.pl +145 -0
  41. data/lib/opener/tokenizers/base.rb +84 -0
  42. data/lib/opener/tokenizers/base/version.rb +8 -0
  43. data/opener-tokenizer-base.gemspec +25 -0
  44. metadata +134 -0
@@ -0,0 +1,1101 @@
1
+ use 5.006;
2
+ use strict;
3
+ use warnings;
4
+ package Sub::Exporter;
5
+ {
6
+ $Sub::Exporter::VERSION = '0.984';
7
+ }
8
+ # ABSTRACT: a sophisticated exporter for custom-built routines
9
+
10
+ use Carp ();
11
+ use Data::OptList 0.100 ();
12
+ use Params::Util 0.14 (); # _CODELIKE
13
+ use Sub::Install 0.92 ();
14
+
15
+
16
+ # Given a potential import name, this returns the group name -- if it's got a
17
+ # group prefix.
18
+ sub _group_name {
19
+ my ($name) = @_;
20
+
21
+ return if (index q{-:}, (substr $name, 0, 1)) == -1;
22
+ return substr $name, 1;
23
+ }
24
+
25
+ # \@groups is a canonicalized opt list of exports and groups this returns
26
+ # another canonicalized opt list with groups replaced with relevant exports.
27
+ # \%seen is groups we've already expanded and can ignore.
28
+ # \%merge is merged options from the group we're descending through.
29
+ sub _expand_groups {
30
+ my ($class, $config, $groups, $collection, $seen, $merge) = @_;
31
+ $seen ||= {};
32
+ $merge ||= {};
33
+ my @groups = @$groups;
34
+
35
+ for my $i (reverse 0 .. $#groups) {
36
+ if (my $group_name = _group_name($groups[$i][0])) {
37
+ my $seen = { %$seen }; # faux-dynamic scoping
38
+
39
+ splice @groups, $i, 1,
40
+ _expand_group($class, $config, $groups[$i], $collection, $seen, $merge);
41
+ } else {
42
+ # there's nothing to munge in this export's args
43
+ next unless my %merge = %$merge;
44
+
45
+ # we have things to merge in; do so
46
+ my $prefix = (delete $merge{-prefix}) || '';
47
+ my $suffix = (delete $merge{-suffix}) || '';
48
+
49
+ if (
50
+ Params::Util::_CODELIKE($groups[$i][1]) ## no critic Private
51
+ or
52
+ Params::Util::_SCALAR0($groups[$i][1]) ## no critic Private
53
+ ) {
54
+ # this entry was build by a group generator
55
+ $groups[$i][0] = $prefix . $groups[$i][0] . $suffix;
56
+ } else {
57
+ my $as
58
+ = ref $groups[$i][1]{-as} ? $groups[$i][1]{-as}
59
+ : $groups[$i][1]{-as} ? $prefix . $groups[$i][1]{-as} . $suffix
60
+ : $prefix . $groups[$i][0] . $suffix;
61
+
62
+ $groups[$i][1] = { %{ $groups[$i][1] }, %merge, -as => $as };
63
+ }
64
+ }
65
+ }
66
+
67
+ return \@groups;
68
+ }
69
+
70
+ # \@group is a name/value pair from an opt list.
71
+ sub _expand_group {
72
+ my ($class, $config, $group, $collection, $seen, $merge) = @_;
73
+ $merge ||= {};
74
+
75
+ my ($group_name, $group_arg) = @$group;
76
+ $group_name = _group_name($group_name);
77
+
78
+ Carp::croak qq(group "$group_name" is not exported by the $class module)
79
+ unless exists $config->{groups}{$group_name};
80
+
81
+ return if $seen->{$group_name}++;
82
+
83
+ if (ref $group_arg) {
84
+ my $prefix = (delete $merge->{-prefix}||'') . ($group_arg->{-prefix}||'');
85
+ my $suffix = ($group_arg->{-suffix}||'') . (delete $merge->{-suffix}||'');
86
+ $merge = {
87
+ %$merge,
88
+ %$group_arg,
89
+ ($prefix ? (-prefix => $prefix) : ()),
90
+ ($suffix ? (-suffix => $suffix) : ()),
91
+ };
92
+ }
93
+
94
+ my $exports = $config->{groups}{$group_name};
95
+
96
+ if (
97
+ Params::Util::_CODELIKE($exports) ## no critic Private
98
+ or
99
+ Params::Util::_SCALAR0($exports) ## no critic Private
100
+ ) {
101
+ # I'm not very happy with this code for hiding -prefix and -suffix, but
102
+ # it's needed, and I'm not sure, offhand, how to make it better.
103
+ # -- rjbs, 2006-12-05
104
+ my $group_arg = $merge ? { %$merge } : {};
105
+ delete $group_arg->{-prefix};
106
+ delete $group_arg->{-suffix};
107
+
108
+ my $group = Params::Util::_CODELIKE($exports) ## no critic Private
109
+ ? $exports->($class, $group_name, $group_arg, $collection)
110
+ : $class->$$exports($group_name, $group_arg, $collection);
111
+
112
+ Carp::croak qq(group generator "$group_name" did not return a hashref)
113
+ if ref $group ne 'HASH';
114
+
115
+ my $stuff = [ map { [ $_ => $group->{$_} ] } keys %$group ];
116
+ return @{
117
+ _expand_groups($class, $config, $stuff, $collection, $seen, $merge)
118
+ };
119
+ } else {
120
+ $exports
121
+ = Data::OptList::mkopt($exports, "$group_name exports");
122
+
123
+ return @{
124
+ _expand_groups($class, $config, $exports, $collection, $seen, $merge)
125
+ };
126
+ }
127
+ }
128
+
129
+ sub _mk_collection_builder {
130
+ my ($col, $etc) = @_;
131
+ my ($config, $import_args, $class, $into) = @$etc;
132
+
133
+ my %seen;
134
+ sub {
135
+ my ($collection) = @_;
136
+ my ($name, $value) = @$collection;
137
+
138
+ Carp::croak "collection $name provided multiple times in import"
139
+ if $seen{ $name }++;
140
+
141
+ if (ref(my $hook = $config->{collectors}{$name})) {
142
+ my $arg = {
143
+ name => $name,
144
+ config => $config,
145
+ import_args => $import_args,
146
+ class => $class,
147
+ into => $into,
148
+ };
149
+
150
+ my $error_msg = "collection $name failed validation";
151
+ if (Params::Util::_SCALAR0($hook)) { ## no critic Private
152
+ Carp::croak $error_msg unless $class->$$hook($value, $arg);
153
+ } else {
154
+ Carp::croak $error_msg unless $hook->($value, $arg);
155
+ }
156
+ }
157
+
158
+ $col->{ $name } = $value;
159
+ }
160
+ }
161
+
162
+ # Given a config and pre-canonicalized importer args, remove collections from
163
+ # the args and return them.
164
+ sub _collect_collections {
165
+ my ($config, $import_args, $class, $into) = @_;
166
+
167
+ my @collections
168
+ = map { splice @$import_args, $_, 1 }
169
+ grep { exists $config->{collectors}{ $import_args->[$_][0] } }
170
+ reverse 0 .. $#$import_args;
171
+
172
+ unshift @collections, [ INIT => {} ] if $config->{collectors}{INIT};
173
+
174
+ my $col = {};
175
+ my $builder = _mk_collection_builder($col, \@_);
176
+ for my $collection (@collections) {
177
+ $builder->($collection)
178
+ }
179
+
180
+ return $col;
181
+ }
182
+
183
+
184
+ sub setup_exporter {
185
+ my ($config) = @_;
186
+
187
+ Carp::croak 'into and into_level may not both be supplied to exporter'
188
+ if exists $config->{into} and exists $config->{into_level};
189
+
190
+ my $as = delete $config->{as} || 'import';
191
+ my $into
192
+ = exists $config->{into} ? delete $config->{into}
193
+ : exists $config->{into_level} ? caller(delete $config->{into_level})
194
+ : caller(0);
195
+
196
+ my $import = build_exporter($config);
197
+
198
+ Sub::Install::reinstall_sub({
199
+ code => $import,
200
+ into => $into,
201
+ as => $as,
202
+ });
203
+ }
204
+
205
+
206
+ sub _key_intersection {
207
+ my ($x, $y) = @_;
208
+ my %seen = map { $_ => 1 } keys %$x;
209
+ my @names = grep { $seen{$_} } keys %$y;
210
+ }
211
+
212
+ # Given the config passed to setup_exporter, which contains sugary opt list
213
+ # data, rewrite the opt lists into hashes, catch a few kinds of invalid
214
+ # configurations, and set up defaults. Since the config is a reference, it's
215
+ # rewritten in place.
216
+ my %valid_config_key;
217
+ BEGIN {
218
+ %valid_config_key =
219
+ map { $_ => 1 }
220
+ qw(as collectors installer generator exports groups into into_level),
221
+ qw(exporter), # deprecated
222
+ }
223
+
224
+ sub _assert_collector_names_ok {
225
+ my ($collectors) = @_;
226
+
227
+ for my $reserved_name (grep { /\A[_A-Z]+\z/ } keys %$collectors) {
228
+ Carp::croak "unknown reserved collector name: $reserved_name"
229
+ if $reserved_name ne 'INIT';
230
+ }
231
+ }
232
+
233
+ sub _rewrite_build_config {
234
+ my ($config) = @_;
235
+
236
+ if (my @keys = grep { not exists $valid_config_key{$_} } keys %$config) {
237
+ Carp::croak "unknown options (@keys) passed to Sub::Exporter";
238
+ }
239
+
240
+ Carp::croak q(into and into_level may not both be supplied to exporter)
241
+ if exists $config->{into} and exists $config->{into_level};
242
+
243
+ # XXX: Remove after deprecation period.
244
+ if ($config->{exporter}) {
245
+ Carp::cluck "'exporter' argument to build_exporter is deprecated. Use 'installer' instead; the semantics are identical.";
246
+ $config->{installer} = delete $config->{exporter};
247
+ }
248
+
249
+ Carp::croak q(into and into_level may not both be supplied to exporter)
250
+ if exists $config->{into} and exists $config->{into_level};
251
+
252
+ for (qw(exports collectors)) {
253
+ $config->{$_} = Data::OptList::mkopt_hash(
254
+ $config->{$_},
255
+ $_,
256
+ [ 'CODE', 'SCALAR' ],
257
+ );
258
+ }
259
+
260
+ _assert_collector_names_ok($config->{collectors});
261
+
262
+ if (my @names = _key_intersection(@$config{qw(exports collectors)})) {
263
+ Carp::croak "names (@names) used in both collections and exports";
264
+ }
265
+
266
+ $config->{groups} = Data::OptList::mkopt_hash(
267
+ $config->{groups},
268
+ 'groups',
269
+ [
270
+ 'HASH', # standard opt list
271
+ 'ARRAY', # standard opt list
272
+ 'CODE', # group generator
273
+ 'SCALAR', # name of group generation method
274
+ ]
275
+ );
276
+
277
+ # by default, export nothing
278
+ $config->{groups}{default} ||= [];
279
+
280
+ # by default, build an all-inclusive 'all' group
281
+ $config->{groups}{all} ||= [ keys %{ $config->{exports} } ];
282
+
283
+ $config->{generator} ||= \&default_generator;
284
+ $config->{installer} ||= \&default_installer;
285
+ }
286
+
287
+ sub build_exporter {
288
+ my ($config) = @_;
289
+
290
+ _rewrite_build_config($config);
291
+
292
+ my $import = sub {
293
+ my ($class) = shift;
294
+
295
+ # XXX: clean this up -- rjbs, 2006-03-16
296
+ my $special = (ref $_[0]) ? shift(@_) : {};
297
+ Carp::croak q(into and into_level may not both be supplied to exporter)
298
+ if exists $special->{into} and exists $special->{into_level};
299
+
300
+ if ($special->{exporter}) {
301
+ Carp::cluck "'exporter' special import argument is deprecated. Use 'installer' instead; the semantics are identical.";
302
+ $special->{installer} = delete $special->{exporter};
303
+ }
304
+
305
+ my $into
306
+ = defined $special->{into} ? delete $special->{into}
307
+ : defined $special->{into_level} ? caller(delete $special->{into_level})
308
+ : defined $config->{into} ? $config->{into}
309
+ : defined $config->{into_level} ? caller($config->{into_level})
310
+ : caller(0);
311
+
312
+ my $generator = delete $special->{generator} || $config->{generator};
313
+ my $installer = delete $special->{installer} || $config->{installer};
314
+
315
+ # this builds a AOA, where the inner arrays are [ name => value_ref ]
316
+ my $import_args = Data::OptList::mkopt([ @_ ]);
317
+
318
+ # is this right? defaults first or collectors first? -- rjbs, 2006-06-24
319
+ $import_args = [ [ -default => undef ] ] unless @$import_args;
320
+
321
+ my $collection = _collect_collections($config, $import_args, $class, $into);
322
+
323
+ my $to_import = _expand_groups($class, $config, $import_args, $collection);
324
+
325
+ # now, finally $import_arg is really the "to do" list
326
+ _do_import(
327
+ {
328
+ class => $class,
329
+ col => $collection,
330
+ config => $config,
331
+ into => $into,
332
+ generator => $generator,
333
+ installer => $installer,
334
+ },
335
+ $to_import,
336
+ );
337
+ };
338
+
339
+ return $import;
340
+ }
341
+
342
+ sub _do_import {
343
+ my ($arg, $to_import) = @_;
344
+
345
+ my @todo;
346
+
347
+ for my $pair (@$to_import) {
348
+ my ($name, $import_arg) = @$pair;
349
+
350
+ my ($generator, $as);
351
+
352
+ if ($import_arg and Params::Util::_CODELIKE($import_arg)) { ## no critic
353
+ # This is the case when a group generator has inserted name/code pairs.
354
+ $generator = sub { $import_arg };
355
+ $as = $name;
356
+ } else {
357
+ $import_arg = { $import_arg ? %$import_arg : () };
358
+
359
+ Carp::croak qq("$name" is not exported by the $arg->{class} module)
360
+ unless exists $arg->{config}{exports}{$name};
361
+
362
+ $generator = $arg->{config}{exports}{$name};
363
+
364
+ $as = exists $import_arg->{-as} ? (delete $import_arg->{-as}) : $name;
365
+ }
366
+
367
+ my $code = $arg->{generator}->(
368
+ {
369
+ class => $arg->{class},
370
+ name => $name,
371
+ arg => $import_arg,
372
+ col => $arg->{col},
373
+ generator => $generator,
374
+ }
375
+ );
376
+
377
+ push @todo, $as, $code;
378
+ }
379
+
380
+ $arg->{installer}->(
381
+ {
382
+ class => $arg->{class},
383
+ into => $arg->{into},
384
+ col => $arg->{col},
385
+ },
386
+ \@todo,
387
+ );
388
+ }
389
+
390
+ ## Cute idea, possibly for future use: also supply an "unimport" for:
391
+ ## no Module::Whatever qw(arg arg arg);
392
+ # sub _unexport {
393
+ # my (undef, undef, undef, undef, undef, $as, $into) = @_;
394
+ #
395
+ # if (ref $as eq 'SCALAR') {
396
+ # undef $$as;
397
+ # } elsif (ref $as) {
398
+ # Carp::croak "invalid reference type for $as: " . ref $as;
399
+ # } else {
400
+ # no strict 'refs';
401
+ # delete &{$into . '::' . $as};
402
+ # }
403
+ # }
404
+
405
+
406
+ sub default_generator {
407
+ my ($arg) = @_;
408
+ my ($class, $name, $generator) = @$arg{qw(class name generator)};
409
+
410
+ if (not defined $generator) {
411
+ my $code = $class->can($name)
412
+ or Carp::croak "can't locate exported subroutine $name via $class";
413
+ return $code;
414
+ }
415
+
416
+ # I considered making this "$class->$generator(" but it seems that
417
+ # overloading precedence would turn an overloaded-as-code generator object
418
+ # into a string before code. -- rjbs, 2006-06-11
419
+ return $generator->($class, $name, $arg->{arg}, $arg->{col})
420
+ if Params::Util::_CODELIKE($generator); ## no critic Private
421
+
422
+ # This "must" be a scalar reference, to a generator method name.
423
+ # -- rjbs, 2006-12-05
424
+ return $class->$$generator($name, $arg->{arg}, $arg->{col});
425
+ }
426
+
427
+
428
+ sub default_installer {
429
+ my ($arg, $to_export) = @_;
430
+
431
+ for (my $i = 0; $i < @$to_export; $i += 2) {
432
+ my ($as, $code) = @$to_export[ $i, $i+1 ];
433
+
434
+ # Allow as isa ARRAY to push onto an array?
435
+ # Allow into isa HASH to install name=>code into hash?
436
+
437
+ if (ref $as eq 'SCALAR') {
438
+ $$as = $code;
439
+ } elsif (ref $as) {
440
+ Carp::croak "invalid reference type for $as: " . ref $as;
441
+ } else {
442
+ Sub::Install::reinstall_sub({
443
+ code => $code,
444
+ into => $arg->{into},
445
+ as => $as
446
+ });
447
+ }
448
+ }
449
+ }
450
+
451
+ sub default_exporter {
452
+ Carp::cluck "default_exporter is deprecated; call default_installer instead; the semantics are identical";
453
+ goto &default_installer;
454
+ }
455
+
456
+
457
+ setup_exporter({
458
+ exports => [
459
+ qw(setup_exporter build_exporter),
460
+ _import => sub { build_exporter($_[2]) },
461
+ ],
462
+ groups => {
463
+ all => [ qw(setup_exporter build_export) ],
464
+ },
465
+ collectors => { -setup => \&_setup },
466
+ });
467
+
468
+ sub _setup {
469
+ my ($value, $arg) = @_;
470
+
471
+ if (ref $value eq 'HASH') {
472
+ push @{ $arg->{import_args} }, [ _import => { -as => 'import', %$value } ];
473
+ return 1;
474
+ } elsif (ref $value eq 'ARRAY') {
475
+ push @{ $arg->{import_args} },
476
+ [ _import => { -as => 'import', exports => $value } ];
477
+ return 1;
478
+ }
479
+ return;
480
+ }
481
+
482
+
483
+
484
+ "jn8:32"; # <-- magic true value
485
+
486
+ __END__
487
+ =pod
488
+
489
+ =head1 NAME
490
+
491
+ Sub::Exporter - a sophisticated exporter for custom-built routines
492
+
493
+ =head1 VERSION
494
+
495
+ version 0.984
496
+
497
+ =head1 SYNOPSIS
498
+
499
+ Sub::Exporter must be used in two places. First, in an exporting module:
500
+
501
+ # in the exporting module:
502
+ package Text::Tweaker;
503
+ use Sub::Exporter -setup => {
504
+ exports => [
505
+ qw(squish titlecase), # always works the same way
506
+ reformat => \&build_reformatter, # generator to build exported function
507
+ trim => \&build_trimmer,
508
+ indent => \&build_indenter,
509
+ ],
510
+ collectors => [ 'defaults' ],
511
+ };
512
+
513
+ Then, in an importing module:
514
+
515
+ # in the importing module:
516
+ use Text::Tweaker
517
+ 'squish',
518
+ indent => { margin => 5 },
519
+ reformat => { width => 79, justify => 'full', -as => 'prettify_text' },
520
+ defaults => { eol => 'CRLF' };
521
+
522
+ With this setup, the importing module ends up with three routines: C<squish>,
523
+ C<indent>, and C<prettify_text>. The latter two have been built to the
524
+ specifications of the importer -- they are not just copies of the code in the
525
+ exporting package.
526
+
527
+ =head1 DESCRIPTION
528
+
529
+ B<ACHTUNG!> If you're not familiar with Exporter or exporting, read
530
+ L<Sub::Exporter::Tutorial> first!
531
+
532
+ =head2 Why Generators?
533
+
534
+ The biggest benefit of Sub::Exporter over existing exporters (including the
535
+ ubiquitous Exporter.pm) is its ability to build new coderefs for export, rather
536
+ than to simply export code identical to that found in the exporting package.
537
+
538
+ If your module's consumers get a routine that works like this:
539
+
540
+ use Data::Analyze qw(analyze);
541
+ my $value = analyze($data, $tolerance, $passes);
542
+
543
+ and they constantly pass only one or two different set of values for the
544
+ non-C<$data> arguments, your code can benefit from Sub::Exporter. By writing a
545
+ simple generator, you can let them do this, instead:
546
+
547
+ use Data::Analyze
548
+ analyze => { tolerance => 0.10, passes => 10, -as => analyze10 },
549
+ analyze => { tolerance => 0.15, passes => 50, -as => analyze50 };
550
+
551
+ my $value = analyze10($data);
552
+
553
+ The generator for that would look something like this:
554
+
555
+ sub build_analyzer {
556
+ my ($class, $name, $arg) = @_;
557
+
558
+ return sub {
559
+ my $data = shift;
560
+ my $tolerance = shift || $arg->{tolerance};
561
+ my $passes = shift || $arg->{passes};
562
+
563
+ analyze($data, $tolerance, $passes);
564
+ }
565
+ }
566
+
567
+ Your module's user now has to do less work to benefit from it -- and remember,
568
+ you're often your own user! Investing in customized subroutines is an
569
+ investment in future laziness.
570
+
571
+ This also avoids a common form of ugliness seen in many modules: package-level
572
+ configuration. That is, you might have seen something like the above
573
+ implemented like so:
574
+
575
+ use Data::Analyze qw(analyze);
576
+ $Data::Analyze::default_tolerance = 0.10;
577
+ $Data::Analyze::default_passes = 10;
578
+
579
+ This might save time, until you have multiple modules using Data::Analyze.
580
+ Because there is only one global configuration, they step on each other's toes
581
+ and your code begins to have mysterious errors.
582
+
583
+ Generators can also allow you to export class methods to be called as
584
+ subroutines:
585
+
586
+ package Data::Methodical;
587
+ use Sub::Exporter -setup => { exports => { some_method => \&_curry_class } };
588
+
589
+ sub _curry_class {
590
+ my ($class, $name) = @_;
591
+ sub { $class->$name(@_); };
592
+ }
593
+
594
+ Because of the way that exporters and Sub::Exporter work, any package that
595
+ inherits from Data::Methodical can inherit its exporter and override its
596
+ C<some_method>. If a user imports C<some_method> from that package, he'll
597
+ receive a subroutine that calls the method on the subclass, rather than on
598
+ Data::Methodical itself.
599
+
600
+ =head2 Other Customizations
601
+
602
+ Building custom routines with generators isn't the only way that Sub::Exporters
603
+ allows the importing code to refine its use of the exported routines. They may
604
+ also be renamed to avoid naming collisions.
605
+
606
+ Consider the following code:
607
+
608
+ # this program determines to which circle of Hell you will be condemned
609
+ use Morality qw(sin virtue); # for calculating viciousness
610
+ use Math::Trig qw(:all); # for dealing with circles
611
+
612
+ The programmer has inadvertently imported two C<sin> routines. The solution,
613
+ in Exporter.pm-based modules, would be to import only one and then call the
614
+ other by its fully-qualified name. Alternately, the importer could write a
615
+ routine that did so, or could mess about with typeglobs.
616
+
617
+ How much easier to write:
618
+
619
+ # this program determines to which circle of Hell you will be condemned
620
+ use Morality qw(virtue), sin => { -as => 'offense' };
621
+ use Math::Trig -all => { -prefix => 'trig_' };
622
+
623
+ and to have at one's disposal C<offense> and C<trig_sin> -- not to mention
624
+ C<trig_cos> and C<trig_tan>.
625
+
626
+ =head1 EXPORTER CONFIGURATION
627
+
628
+ You can configure an exporter for your package by using Sub::Exporter like so:
629
+
630
+ package Tools;
631
+ use Sub::Exporter
632
+ -setup => { exports => [ qw(function1 function2 function3) ] };
633
+
634
+ This is the simplest way to use the exporter, and is basically equivalent to
635
+ this:
636
+
637
+ package Tools;
638
+ use base qw(Exporter);
639
+ our @EXPORT_OK = qw(function1 function2 function2);
640
+
641
+ Any basic use of Sub::Exporter will look like this:
642
+
643
+ package Tools;
644
+ use Sub::Exporter -setup => \%config;
645
+
646
+ The following keys are valid in C<%config>:
647
+
648
+ exports - a list of routines to provide for exporting; each routine may be
649
+ followed by generator
650
+ groups - a list of groups to provide for exporting; each must be followed by
651
+ either (a) a list of exports, possibly with arguments for each
652
+ export, or (b) a generator
653
+
654
+ collectors - a list of names into which values are collected for use in
655
+ routine generation; each name may be followed by a validator
656
+
657
+ In addition to the basic options above, a few more advanced options may be
658
+ passed:
659
+
660
+ into_level - how far up the caller stack to look for a target (default 0)
661
+ into - an explicit target (package) into which to export routines
662
+
663
+ In other words: Sub::Exporter installs a C<import> routine which, when called,
664
+ exports routines to the calling namespace. The C<into> and C<into_level>
665
+ options change where those exported routines are installed.
666
+
667
+ generator - a callback used to produce the code that will be installed
668
+ default: Sub::Exporter::default_generator
669
+
670
+ installer - a callback used to install the code produced by the generator
671
+ default: Sub::Exporter::default_installer
672
+
673
+ For information on how these callbacks are used, see the documentation for
674
+ C<L</default_generator>> and C<L</default_installer>>.
675
+
676
+ =head2 Export Configuration
677
+
678
+ The C<exports> list may be provided as an array reference or a hash reference.
679
+ The list is processed in such a way that the following are equivalent:
680
+
681
+ { exports => [ qw(foo bar baz), quux => \&quux_generator ] }
682
+
683
+ { exports =>
684
+ { foo => undef, bar => undef, baz => undef, quux => \&quux_generator } }
685
+
686
+ Generators are code that return coderefs. They are called with four
687
+ parameters:
688
+
689
+ $class - the class whose exporter has been called (the exporting class)
690
+ $name - the name of the export for which the routine is being build
691
+ \%arg - the arguments passed for this export
692
+ \%col - the collections for this import
693
+
694
+ Given the configuration in the L</SYNOPSIS>, the following C<use> statement:
695
+
696
+ use Text::Tweaker
697
+ reformat => { -as => 'make_narrow', width => 33 },
698
+ defaults => { eol => 'CR' };
699
+
700
+ would result in the following call to C<&build_reformatter>:
701
+
702
+ my $code = build_reformatter(
703
+ 'Text::Tweaker',
704
+ 'reformat',
705
+ { width => 33 }, # note that -as is not passed in
706
+ { defaults => { eol => 'CR' } },
707
+ );
708
+
709
+ The returned coderef (C<$code>) would then be installed as C<make_narrow> in the
710
+ calling package.
711
+
712
+ Instead of providing a coderef in the configuration, a reference to a method
713
+ name may be provided. This method will then be called on the invocant of the
714
+ C<import> method. (In this case, we do not pass the C<$class> parameter, as it
715
+ would be redundant.)
716
+
717
+ =head2 Group Configuration
718
+
719
+ The C<groups> list can be passed in the same forms as C<exports>. Groups must
720
+ have values to be meaningful, which may either list exports that make up the
721
+ group (optionally with arguments) or may provide a way to build the group.
722
+
723
+ The simpler case is the first: a group definition is a list of exports. Here's
724
+ the example that could go in exporter in the L</SYNOPSIS>.
725
+
726
+ groups => {
727
+ default => [ qw(reformat) ],
728
+ shorteners => [ qw(squish trim) ],
729
+ email_safe => [
730
+ 'indent',
731
+ reformat => { -as => 'email_format', width => 72 }
732
+ ],
733
+ },
734
+
735
+ Groups are imported by specifying their name prefixed be either a dash or a
736
+ colon. This line of code would import the C<shorteners> group:
737
+
738
+ use Text::Tweaker qw(-shorteners);
739
+
740
+ Arguments passed to a group when importing are merged into the groups options
741
+ and passed to any relevant generators. Groups can contain other groups, but
742
+ looping group structures are ignored.
743
+
744
+ The other possible value for a group definition, a coderef, allows one
745
+ generator to build several exportable routines simultaneously. This is useful
746
+ when many routines must share enclosed lexical variables. The coderef must
747
+ return a hash reference. The keys will be used as export names and the values
748
+ are the subs that will be exported.
749
+
750
+ This example shows a simple use of the group generator.
751
+
752
+ package Data::Crypto;
753
+ use Sub::Exporter -setup => { groups => { cipher => \&build_cipher_group } };
754
+
755
+ sub build_cipher_group {
756
+ my ($class, $group, $arg) = @_;
757
+ my ($encode, $decode) = build_codec($arg->{secret});
758
+ return { cipher => $encode, decipher => $decode };
759
+ }
760
+
761
+ The C<cipher> and C<decipher> routines are built in a group because they are
762
+ built together by code which encloses their secret in their environment.
763
+
764
+ =head3 Default Groups
765
+
766
+ If a module that uses Sub::Exporter is C<use>d with no arguments, it will try
767
+ to export the group named C<default>. If that group has not been specifically
768
+ configured, it will be empty, and nothing will happen.
769
+
770
+ Another group is also created if not defined: C<all>. The C<all> group
771
+ contains all the exports from the exports list.
772
+
773
+ =head2 Collector Configuration
774
+
775
+ The C<collectors> entry in the exporter configuration gives names which, when
776
+ found in the import call, have their values collected and passed to every
777
+ generator.
778
+
779
+ For example, the C<build_analyzer> generator that we saw above could be
780
+ rewritten as:
781
+
782
+ sub build_analyzer {
783
+ my ($class, $name, $arg, $col) = @_;
784
+
785
+ return sub {
786
+ my $data = shift;
787
+ my $tolerance = shift || $arg->{tolerance} || $col->{defaults}{tolerance};
788
+ my $passes = shift || $arg->{passes} || $col->{defaults}{passes};
789
+
790
+ analyze($data, $tolerance, $passes);
791
+ }
792
+ }
793
+
794
+ That would allow the import to specify global defaults for his imports:
795
+
796
+ use Data::Analyze
797
+ 'analyze',
798
+ analyze => { tolerance => 0.10, -as => analyze10 },
799
+ analyze => { tolerance => 0.15, passes => 50, -as => analyze50 },
800
+ defaults => { passes => 10 };
801
+
802
+ my $A = analyze10($data); # equivalent to analyze($data, 0.10, 10);
803
+ my $C = analyze50($data); # equivalent to analyze($data, 0.15, 10);
804
+ my $B = analyze($data, 0.20); # equivalent to analyze($data, 0.20, 10);
805
+
806
+ If values are provided in the C<collectors> list during exporter setup, they
807
+ must be code references, and are used to validate the importer's values. The
808
+ validator is called when the collection is found, and if it returns false, an
809
+ exception is thrown. We could ensure that no one tries to set a global data
810
+ default easily:
811
+
812
+ collectors => { defaults => sub { return (exists $_[0]->{data}) ? 0 : 1 } }
813
+
814
+ Collector coderefs can also be used as hooks to perform arbitrary actions
815
+ before anything is exported.
816
+
817
+ When the coderef is called, it is passed the value of the collection and a
818
+ hashref containing the following entries:
819
+
820
+ name - the name of the collector
821
+ config - the exporter configuration (hashref)
822
+ import_args - the arguments passed to the exporter, sans collections (aref)
823
+ class - the package on which the importer was called
824
+ into - the package into which exports will be exported
825
+
826
+ Collectors with all-caps names (that is, made up of underscore or capital A
827
+ through Z) are reserved for special use. The only currently implemented
828
+ special collector is C<INIT>, whose hook (if present in the exporter
829
+ configuration) is always run before any other hook.
830
+
831
+ =head1 CALLING THE EXPORTER
832
+
833
+ Arguments to the exporter (that is, the arguments after the module name in a
834
+ C<use> statement) are parsed as follows:
835
+
836
+ First, the collectors gather any collections found in the arguments. Any
837
+ reference type may be given as the value for a collector. For each collection
838
+ given in the arguments, its validator (if any) is called.
839
+
840
+ Next, groups are expanded. If the group is implemented by a group generator,
841
+ the generator is called. There are two special arguments which, if given to a
842
+ group, have special meaning:
843
+
844
+ -prefix - a string to prepend to any export imported from this group
845
+ -suffix - a string to append to any export imported from this group
846
+
847
+ Finally, individual export generators are called and all subs, generated or
848
+ otherwise, are installed in the calling package. There is only one special
849
+ argument for export generators:
850
+
851
+ -as - where to install the exported sub
852
+
853
+ Normally, C<-as> will contain an alternate name for the routine. It may,
854
+ however, contain a reference to a scalar. If that is the case, a reference the
855
+ generated routine will be placed in the scalar referenced by C<-as>. It will
856
+ not be installed into the calling package.
857
+
858
+ =head2 Special Exporter Arguments
859
+
860
+ The generated exporter accept some special options, which may be passed as the
861
+ first argument, in a hashref.
862
+
863
+ These options are:
864
+
865
+ into_level
866
+ into
867
+ generator
868
+ installer
869
+
870
+ These override the same-named configuration options described in L</EXPORTER
871
+ CONFIGURATION>.
872
+
873
+ =head1 SUBROUTINES
874
+
875
+ =head2 setup_exporter
876
+
877
+ This routine builds and installs an C<import> routine. It is called with one
878
+ argument, a hashref containing the exporter configuration. Using this, it
879
+ builds an exporter and installs it into the calling package with the name
880
+ "import." In addition to the normal exporter configuration, a few named
881
+ arguments may be passed in the hashref:
882
+
883
+ into - into what package should the exporter be installed
884
+ into_level - into what level up the stack should the exporter be installed
885
+ as - what name should the installed exporter be given
886
+
887
+ By default the exporter is installed with the name C<import> into the immediate
888
+ caller of C<setup_exporter>. In other words, if your package calls
889
+ C<setup_exporter> without providing any of the three above arguments, it will
890
+ have an C<import> routine installed.
891
+
892
+ Providing both C<into> and C<into_level> will cause an exception to be thrown.
893
+
894
+ The exporter is built by C<L</build_exporter>>.
895
+
896
+ =head2 build_exporter
897
+
898
+ Given a standard exporter configuration, this routine builds and returns an
899
+ exporter -- that is, a subroutine that can be installed as a class method to
900
+ perform exporting on request.
901
+
902
+ Usually, this method is called by C<L</setup_exporter>>, which then installs
903
+ the exporter as a package's import routine.
904
+
905
+ =head2 default_generator
906
+
907
+ This is Sub::Exporter's default generator. It takes bits of configuration that
908
+ have been gathered during the import and turns them into a coderef that can be
909
+ installed.
910
+
911
+ my $code = default_generator(\%arg);
912
+
913
+ Passed arguments are:
914
+
915
+ class - the class on which the import method was called
916
+ name - the name of the export being generated
917
+ arg - the arguments to the generator
918
+ col - the collections
919
+
920
+ generator - the generator to be used to build the export (code or scalar ref)
921
+
922
+ =head2 default_installer
923
+
924
+ This is Sub::Exporter's default installer. It does what Sub::Exporter
925
+ promises: it installs code into the target package.
926
+
927
+ default_installer(\%arg, \@to_export);
928
+
929
+ Passed arguments are:
930
+
931
+ into - the package into which exports should be delivered
932
+
933
+ C<@to_export> is a list of name/value pairs. The default exporter assigns code
934
+ (the values) to named slots (the names) in the given package. If the name is a
935
+ scalar reference, the scalar reference is made to point to the code reference
936
+ instead.
937
+
938
+ =head1 EXPORTS
939
+
940
+ Sub::Exporter also offers its own exports: the C<setup_exporter> and
941
+ C<build_exporter> routines described above. It also provides a special "setup"
942
+ collector, which will set up an exporter using the parameters passed to it.
943
+
944
+ Note that the "setup" collector (seen in examples like the L</SYNOPSIS> above)
945
+ uses C<build_exporter>, not C<setup_exporter>. This means that the special
946
+ arguments like "into" and "as" for C<setup_exporter> are not accepted here.
947
+ Instead, you may write something like:
948
+
949
+ use Sub::Exporter
950
+ { into => 'Target::Package' },
951
+ -setup => {
952
+ -as => 'do_import',
953
+ exports => [ ... ],
954
+ }
955
+ ;
956
+
957
+ Finding a good reason for wanting to do this is left as as exercise for the
958
+ reader.
959
+
960
+ =head1 COMPARISONS
961
+
962
+ There are a whole mess of exporters on the CPAN. The features included in
963
+ Sub::Exporter set it apart from any existing Exporter. Here's a summary of
964
+ some other exporters and how they compare.
965
+
966
+ =over
967
+
968
+ =item * L<Exporter> and co.
969
+
970
+ This is the standard Perl exporter. Its interface is a little clunky, but it's
971
+ fast and ubiquitous. It can do some things that Sub::Exporter can't: it can
972
+ export things other than routines, it can import "everything in this group
973
+ except this symbol," and some other more esoteric things. These features seem
974
+ to go nearly entirely unused.
975
+
976
+ It always exports things exactly as they appear in the exporting module; it
977
+ can't rename or customize routines. Its groups ("tags") can't be nested.
978
+
979
+ L<Exporter::Lite> is a whole lot like Exporter, but it does significantly less:
980
+ it supports exporting symbols, but not groups, pattern matching, or negation.
981
+
982
+ The fact that Sub::Exporter can't export symbols other than subroutines is
983
+ a good idea, not a missing feature.
984
+
985
+ For simple uses, setting up Sub::Exporter is about as easy as Exporter. For
986
+ complex uses, Sub::Exporter makes hard things possible, which would not be
987
+ possible with Exporter.
988
+
989
+ When using a module that uses Sub::Exporter, users familiar with Exporter will
990
+ probably see no difference in the basics. These two lines do about the same
991
+ thing in whether the exporting module uses Exporter or Sub::Exporter.
992
+
993
+ use Some::Module qw(foo bar baz);
994
+ use Some::Module qw(foo :bar baz);
995
+
996
+ The definition for exporting in Exporter.pm might look like this:
997
+
998
+ package Some::Module;
999
+ use base qw(Exporter);
1000
+ our @EXPORT_OK = qw(foo bar baz quux);
1001
+ our %EXPORT_TAGS = (bar => [ qw(bar baz) ]);
1002
+
1003
+ Using Sub::Exporter, it would look like this:
1004
+
1005
+ package Some::Module;
1006
+ use Sub::Exporter -setup => {
1007
+ exports => [ qw(foo bar baz quux) ],
1008
+ groups => { bar => [ qw(bar baz) ]}
1009
+ };
1010
+
1011
+ Sub::Exporter respects inheritance, so that a package may export inherited
1012
+ routines, and will export the most inherited version. Exporting methods
1013
+ without currying away the invocant is a bad idea, but Sub::Exporter allows you
1014
+ to do just that -- and anyway, there are other uses for this feature, like
1015
+ packages of exported subroutines which use inheritance specifically to allow
1016
+ more specialized, but similar, packages.
1017
+
1018
+ L<Exporter::Easy> provides a wrapper around the standard Exporter. It makes it
1019
+ simpler to build groups, but doesn't provide any more functionality. Because
1020
+ it is a front-end to Exporter, it will store your exporter's configuration in
1021
+ global package variables.
1022
+
1023
+ =item * Attribute-Based Exporters
1024
+
1025
+ Some exporters use attributes to mark variables to export. L<Exporter::Simple>
1026
+ supports exporting any kind of symbol, and supports groups. Using a module
1027
+ like Exporter or Sub::Exporter, it's easy to look at one place and see what is
1028
+ exported, but it's impossible to look at a variable definition and see whether
1029
+ it is exported by that alone. Exporter::Simple makes this trade in reverse:
1030
+ each variable's declaration includes its export definition, but there is no one
1031
+ place to look to find a manifest of exports.
1032
+
1033
+ More importantly, Exporter::Simple does not add any new features to those of
1034
+ Exporter. In fact, like Exporter::Easy, it is just a front-end to Exporter, so
1035
+ it ends up storing its configuration in global package variables. (This means
1036
+ that there is one place to look for your exporter's manifest, actually. You
1037
+ can inspect the C<@EXPORT> package variables, and other related package
1038
+ variables, at runtime.)
1039
+
1040
+ L<Perl6::Export> isn't actually attribute based, but looks similar. Its syntax
1041
+ is borrowed from Perl 6, and implemented by a source filter. It is a prototype
1042
+ of an interface that is still being designed. It should probably be avoided
1043
+ for production work. On the other hand, L<Perl6::Export::Attrs> implements
1044
+ Perl 6-like exporting, but translates it into Perl 5 by providing attributes.
1045
+
1046
+ =item * Other Exporters
1047
+
1048
+ L<Exporter::Renaming> wraps the standard Exporter to allow it to export symbols
1049
+ with changed names.
1050
+
1051
+ L<Class::Exporter> performs a special kind of routine generation, giving each
1052
+ importing package an instance of your class, and then exporting the instance's
1053
+ methods as normal routines. (Sub::Exporter, of course, can easily emulate this
1054
+ behavior, as shown above.)
1055
+
1056
+ L<Exporter::Tidy> implements a form of renaming (using its C<_map> argument)
1057
+ and of prefixing, and implements groups. It also avoids using package
1058
+ variables for its configuration.
1059
+
1060
+ =back
1061
+
1062
+ =head1 TODO
1063
+
1064
+ =over
1065
+
1066
+ =item * write a set of longer, more demonstrative examples
1067
+
1068
+ =item * solidify the "custom exporter" interface (see C<&default_exporter>)
1069
+
1070
+ =item * add an "always" group
1071
+
1072
+ =back
1073
+
1074
+ =head1 THANKS
1075
+
1076
+ Hans Dieter Pearcey provided helpful advice while I was writing Sub::Exporter.
1077
+ Ian Langworth and Shawn Sorichetti asked some good questions and helped me
1078
+ improve my documentation quite a bit. Yuval Kogman helped me find a bunch of
1079
+ little problems.
1080
+
1081
+ Thanks, guys!
1082
+
1083
+ =head1 BUGS
1084
+
1085
+ Please report any bugs or feature requests through the web interface at
1086
+ L<http://rt.cpan.org>. I will be notified, and then you'll automatically be
1087
+ notified of progress on your bug as I make changes.
1088
+
1089
+ =head1 AUTHOR
1090
+
1091
+ Ricardo Signes <rjbs@cpan.org>
1092
+
1093
+ =head1 COPYRIGHT AND LICENSE
1094
+
1095
+ This software is copyright (c) 2007 by Ricardo Signes.
1096
+
1097
+ This is free software; you can redistribute it and/or modify it under
1098
+ the same terms as the Perl 5 programming language system itself.
1099
+
1100
+ =cut
1101
+