opener-tokenizer-base 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +148 -0
- data/bin/tokenizer-base +5 -0
- data/bin/tokenizer-de +5 -0
- data/bin/tokenizer-en +5 -0
- data/bin/tokenizer-es +5 -0
- data/bin/tokenizer-fr +5 -0
- data/bin/tokenizer-it +5 -0
- data/bin/tokenizer-nl +5 -0
- data/core/lib/Data/OptList.pm +256 -0
- data/core/lib/Params/Util.pm +866 -0
- data/core/lib/Sub/Exporter.pm +1101 -0
- data/core/lib/Sub/Exporter/Cookbook.pod +309 -0
- data/core/lib/Sub/Exporter/Tutorial.pod +280 -0
- data/core/lib/Sub/Exporter/Util.pm +354 -0
- data/core/lib/Sub/Install.pm +329 -0
- data/core/lib/Time/Stamp.pm +808 -0
- data/core/load-prefixes.pl +43 -0
- data/core/nonbreaking_prefixes/abbreviation_list.kaf +0 -0
- data/core/nonbreaking_prefixes/abbreviation_list.txt +444 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.ca +533 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.de +781 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.el +448 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.en +564 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.es +758 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.fr +1027 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.is +697 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.it +641 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.nl +739 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.pl +729 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.pt +656 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.ro +484 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.ru +705 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.sk +920 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.sl +524 -0
- data/core/nonbreaking_prefixes/nonbreaking_prefix.sv +492 -0
- data/core/split-sentences.pl +114 -0
- data/core/text-fixer.pl +169 -0
- data/core/tokenizer-cli.pl +363 -0
- data/core/tokenizer.pl +145 -0
- data/lib/opener/tokenizers/base.rb +84 -0
- data/lib/opener/tokenizers/base/version.rb +8 -0
- data/opener-tokenizer-base.gemspec +25 -0
- metadata +134 -0
@@ -0,0 +1,808 @@
|
|
1
|
+
# vim: set sw=2 sts=2 ts=2 expandtab smarttab:
|
2
|
+
#
|
3
|
+
# This file is part of Time-Stamp
|
4
|
+
#
|
5
|
+
# This software is copyright (c) 2011 by Randy Stauner.
|
6
|
+
#
|
7
|
+
# This is free software; you can redistribute it and/or modify it under
|
8
|
+
# the same terms as the Perl 5 programming language system itself.
|
9
|
+
#
|
10
|
+
use strict;
|
11
|
+
use warnings;
|
12
|
+
|
13
|
+
package Time::Stamp;
|
14
|
+
{
|
15
|
+
$Time::Stamp::VERSION = '1.200';
|
16
|
+
}
|
17
|
+
BEGIN {
|
18
|
+
$Time::Stamp::AUTHORITY = 'cpan:RWSTAUNER';
|
19
|
+
}
|
20
|
+
# ABSTRACT: Easy, readable, efficient timestamp functions
|
21
|
+
|
22
|
+
# TODO: use collector?
|
23
|
+
|
24
|
+
use Sub::Exporter 0.982 -setup => {
|
25
|
+
-as => 'do_import',
|
26
|
+
exports => [
|
27
|
+
localstamp => \'_build_localstamp',
|
28
|
+
gmstamp => \'_build_gmstamp',
|
29
|
+
parsegm => \'_build_parsestamp',
|
30
|
+
parselocal => \'_build_parsestamp',
|
31
|
+
],
|
32
|
+
groups => [
|
33
|
+
stamps => [qw(localstamp gmstamp)],
|
34
|
+
parsers => [qw(parselocal parsegm)],
|
35
|
+
]
|
36
|
+
};
|
37
|
+
|
38
|
+
sub import {
|
39
|
+
@_ = map {
|
40
|
+
/(local|gm)(?:stamp)?((?:-\w+)+)/
|
41
|
+
? ($1.'stamp' => {
|
42
|
+
map {
|
43
|
+
/^([um]s)$/ ? ($1 => 1)
|
44
|
+
: (format => $_)
|
45
|
+
}
|
46
|
+
grep { $_ }
|
47
|
+
split(/-/, $2)
|
48
|
+
})
|
49
|
+
: $_
|
50
|
+
} @_;
|
51
|
+
goto &do_import;
|
52
|
+
}
|
53
|
+
|
54
|
+
# set up named formats with default values
|
55
|
+
my $formats = do {
|
56
|
+
# should we offer { prefix => '', suffix => '' } ? is that really useful?
|
57
|
+
# the stamps are easy enough to parse as is (the whole point of this module)
|
58
|
+
my %default = (
|
59
|
+
date_sep => '-',
|
60
|
+
dt_sep => 'T', # ISO 8601
|
61
|
+
time_sep => ':',
|
62
|
+
tz_sep => '',
|
63
|
+
tz => '',
|
64
|
+
);
|
65
|
+
my %blank = map { $_ => '' } keys %default;
|
66
|
+
my $n = {
|
67
|
+
default => {%default},
|
68
|
+
easy => {%default, dt_sep => ' ', tz_sep => ' '}, # easier to read
|
69
|
+
numeric => {%blank},
|
70
|
+
compact => {
|
71
|
+
%blank,
|
72
|
+
dt_sep => '_', # visual separation
|
73
|
+
},
|
74
|
+
};
|
75
|
+
# aliases
|
76
|
+
$n->{$_} = $n->{default} for qw(iso8601 rfc3339 w3cdtf);
|
77
|
+
$n;
|
78
|
+
};
|
79
|
+
|
80
|
+
# we could offer a separate format_time_array() but currently
|
81
|
+
# I think the gain would be less than the cost of the extra function call:
|
82
|
+
# sub _build { return sub { format_time_array($arg, @_ or localtime) }; }
|
83
|
+
# sub format_time_array { sprintf(_format(shift), _ymdhms(@_)) }
|
84
|
+
|
85
|
+
sub _build_localstamp {
|
86
|
+
##my ( $class, $name, $arg, $col ) = @_;
|
87
|
+
my ( undef, undef, $arg, undef ) = @_;
|
88
|
+
|
89
|
+
return _generate_code(local => $arg);
|
90
|
+
}
|
91
|
+
|
92
|
+
sub _build_gmstamp {
|
93
|
+
##my ( $class, $name, $arg, $col ) = @_;
|
94
|
+
my ( undef, undef, $arg, undef ) = @_;
|
95
|
+
|
96
|
+
# add the Z for UTC (Zulu) time zone unless the numeric format is requested
|
97
|
+
$arg = {tz => 'Z', %$arg}
|
98
|
+
unless $arg->{format} && $arg->{format} eq 'numeric';
|
99
|
+
|
100
|
+
return _generate_code(gm => $arg);
|
101
|
+
}
|
102
|
+
|
103
|
+
# TODO: could these subs be faster with a no_args option? would only save 2 if's
|
104
|
+
sub _generate_code {
|
105
|
+
my ($which, $arg) = @_;
|
106
|
+
$arg = { %$arg };
|
107
|
+
# note: mu is 03BC
|
108
|
+
$arg->{frac} ||= $arg->{us} ? 6 : $arg->{ms} ? 3 : 0;
|
109
|
+
|
110
|
+
my $format = _format($arg);
|
111
|
+
|
112
|
+
my $code;
|
113
|
+
my $vars = {
|
114
|
+
which => $which,
|
115
|
+
};
|
116
|
+
if( $arg->{frac} ){
|
117
|
+
$vars->{frac} = $arg->{frac};
|
118
|
+
# always display a fraction if requested
|
119
|
+
$vars->{gettime} = _have_hires()
|
120
|
+
? 'Time::HiRes::gettimeofday()'
|
121
|
+
# if HiRes fails to load use whole number precision
|
122
|
+
: '(CORE::time(), 0)';
|
123
|
+
$code = <<'CODE';
|
124
|
+
sub {
|
125
|
+
# localtime() will not preserve the fraction, so separate it
|
126
|
+
my ($t, $f) = @_ ? (split(/\./, $_[0]), 0) : {{gettime}};
|
127
|
+
my @lt = _ymdhms(@_ > 1 ? @_ : CORE::{{which}}time($t));
|
128
|
+
|
129
|
+
# use %.6f for precision, but strip leading zero
|
130
|
+
return sprintf($format, @lt, substr(sprintf('%.{{frac}}f', '.'.$f), 1));
|
131
|
+
};
|
132
|
+
CODE
|
133
|
+
}
|
134
|
+
# if not using fraction return a more efficient sub
|
135
|
+
else {
|
136
|
+
$code = <<'CODE';
|
137
|
+
sub {
|
138
|
+
return sprintf($format,
|
139
|
+
_ymdhms(@_ > 1 ? @_ : CORE::{{which}}time(@_ ? $_[0] : time))
|
140
|
+
);
|
141
|
+
};
|
142
|
+
CODE
|
143
|
+
}
|
144
|
+
# poor man's template (easier than sprintf or escaping sigils)
|
145
|
+
$code =~ s/\{\{(\w+)\}\}/$vars->{$1}/g;
|
146
|
+
|
147
|
+
return do { eval $code or die $@ }; ## no critic (StringyEval)
|
148
|
+
}
|
149
|
+
|
150
|
+
sub _build_parsestamp {
|
151
|
+
##my ($class, $name, $arg, $col) = @_;
|
152
|
+
my ( undef, $name, $arg, undef ) = @_;
|
153
|
+
|
154
|
+
# pre-compile the regexp
|
155
|
+
my $regexp = exists $arg->{regexp}
|
156
|
+
? qr/$arg->{regexp}/
|
157
|
+
: qr/^ (\d{4}) \D* (\d{2}) \D* (\d{2}) \D*
|
158
|
+
(\d{2}) \D* (\d{2}) \D* (\d{2}) (?:\.(\d+))? .* $/x;
|
159
|
+
|
160
|
+
require Time::Local; # core
|
161
|
+
my $time = $name eq 'parsegm'
|
162
|
+
? \&Time::Local::timegm
|
163
|
+
: \&Time::Local::timelocal;
|
164
|
+
|
165
|
+
return sub {
|
166
|
+
my ($stamp) = @_;
|
167
|
+
my ($frac, @time) = reverse ($stamp =~ $regexp);
|
168
|
+
|
169
|
+
# if the regexp didn't match (empty list) give up now
|
170
|
+
return
|
171
|
+
if !@time;
|
172
|
+
|
173
|
+
# regexp didn't have 7th capture group (for fraction)
|
174
|
+
if( @time < 6 ){
|
175
|
+
unshift @time, $frac;
|
176
|
+
# if there was a fraction in group 6 separate it
|
177
|
+
# or timelocal may produce something unexpected.
|
178
|
+
# if there was no fraction $frac will be undef
|
179
|
+
($time[0], $frac) = split(/\./, $time[0]);
|
180
|
+
}
|
181
|
+
|
182
|
+
# coerce strings into numbers (map { int } would not work for fractions)
|
183
|
+
@time = map { $_ + 0 } @time;
|
184
|
+
|
185
|
+
$time[5] -= 1900; # year
|
186
|
+
$time[4] -= 1; # month
|
187
|
+
|
188
|
+
# make sure it starts with a dot (whether it has one or not)
|
189
|
+
$frac =~ s/^0?\.?/./
|
190
|
+
if defined $frac;
|
191
|
+
|
192
|
+
if( wantarray ){
|
193
|
+
$time[0] .= $frac
|
194
|
+
if defined $frac;
|
195
|
+
return @time;
|
196
|
+
}
|
197
|
+
else {
|
198
|
+
my $ts = &$time(@time);
|
199
|
+
$ts .= $frac
|
200
|
+
if defined $frac;
|
201
|
+
return $ts;
|
202
|
+
}
|
203
|
+
};
|
204
|
+
}
|
205
|
+
|
206
|
+
sub _format {
|
207
|
+
my ($arg) = @_;
|
208
|
+
|
209
|
+
my $name = $arg->{format} || ''; # avoid undef
|
210
|
+
# we could return $arg->{format} unless exists $formats->{$name}; warn if no % found?
|
211
|
+
# or just return $arg->{sprintf} if exists $arg->{sprintf};
|
212
|
+
$name = 'default'
|
213
|
+
unless exists $formats->{$name};
|
214
|
+
|
215
|
+
# start with named format, overwrite with any explicitly specified options
|
216
|
+
my %opt = (%{ $formats->{$name} }, %$arg);
|
217
|
+
|
218
|
+
# TODO: $opt{tz} = tz_offset() if $opt{guess_tz};
|
219
|
+
|
220
|
+
# sadly "%02.6f" does not zero-pad the integer portion, so we have to be trickier
|
221
|
+
|
222
|
+
return
|
223
|
+
join($opt{date_sep}, qw(%04d %02d %02d)) .
|
224
|
+
$opt{dt_sep} .
|
225
|
+
join($opt{time_sep}, qw(%02d %02d %02d)) .
|
226
|
+
($opt{frac} ? '%s' : '') .
|
227
|
+
($opt{tz} ? $opt{tz_sep} . $opt{tz} : '')
|
228
|
+
;
|
229
|
+
}
|
230
|
+
|
231
|
+
# convert *time() arrays to something ready to send to sprintf
|
232
|
+
sub _ymdhms {
|
233
|
+
return ($_[5] + 1900, $_[4] + 1, @_[3, 2, 1, 0]);
|
234
|
+
}
|
235
|
+
|
236
|
+
my $_have_hires;
|
237
|
+
sub _have_hires {
|
238
|
+
if( !defined($_have_hires) ){
|
239
|
+
local $@;
|
240
|
+
$_have_hires = eval { require Time::HiRes; 1 } || do {
|
241
|
+
warn "Time::HiRes requested but failed to load: $@";
|
242
|
+
0;
|
243
|
+
};
|
244
|
+
}
|
245
|
+
return $_have_hires;
|
246
|
+
}
|
247
|
+
|
248
|
+
# define default localstamp and gmstamp in this package
|
249
|
+
# so that exporting is not strictly required
|
250
|
+
__PACKAGE__->import(qw(
|
251
|
+
localstamp
|
252
|
+
gmstamp
|
253
|
+
parsegm
|
254
|
+
parselocal
|
255
|
+
));
|
256
|
+
|
257
|
+
1;
|
258
|
+
|
259
|
+
|
260
|
+
__END__
|
261
|
+
=pod
|
262
|
+
|
263
|
+
=for :stopwords Randy Stauner ACKNOWLEDGEMENTS TODO timestamp gmstamp localstamp UTC
|
264
|
+
parsegm parselocal 6th 7th cpan testmatrix url annocpan anno bugtracker rt
|
265
|
+
cpants kwalitee diff irc mailto metadata placeholders metacpan
|
266
|
+
|
267
|
+
=encoding utf-8
|
268
|
+
|
269
|
+
=head1 NAME
|
270
|
+
|
271
|
+
Time::Stamp - Easy, readable, efficient timestamp functions
|
272
|
+
|
273
|
+
=head1 VERSION
|
274
|
+
|
275
|
+
version 1.200
|
276
|
+
|
277
|
+
=head1 SYNOPSIS
|
278
|
+
|
279
|
+
# import customized functions to make easy-to-use timestamps
|
280
|
+
|
281
|
+
use Time::Stamp 'gmstamp';
|
282
|
+
my $now = gmstamp();
|
283
|
+
my $mtime = gmstamp( (stat($file))[9] );
|
284
|
+
# $mtime is something like "2012-05-18T10:52:32Z"
|
285
|
+
|
286
|
+
|
287
|
+
use Time::Stamp localstamp => { -as => 'ltime', format => 'compact' };
|
288
|
+
# ltime() will return something like "20120518_105232"
|
289
|
+
|
290
|
+
use Time::Stamp -stamps => { dt_sep => ' ', date_sep => '/', us => 1 };
|
291
|
+
# localstamp() will return something like "2012/05/18 10:52:32.123456"
|
292
|
+
|
293
|
+
|
294
|
+
# inverse functions to parse the stamps
|
295
|
+
|
296
|
+
use Time::Stamp 'parsegm';
|
297
|
+
my $seconds = parsegm($stamp);
|
298
|
+
|
299
|
+
use Time::Stamp parselocal => { -as => 'parsel', regexp => qr/$pattern/ };
|
300
|
+
|
301
|
+
use Time::Stamp -parsers => { regexp => qr/$pattern/ };
|
302
|
+
|
303
|
+
|
304
|
+
# the default configurations of each function
|
305
|
+
# are available without importing into your namespace
|
306
|
+
|
307
|
+
$stamp = Time::Stamp::gmstamp($time);
|
308
|
+
$time = Time::Stamp::parsegm($stamp);
|
309
|
+
|
310
|
+
|
311
|
+
# use shortcuts for specifying desired format, useful for one-liners:
|
312
|
+
qx/perl -MTime::Stamp=local-compact -E 'say localstamp'/;
|
313
|
+
# with milliseconds:
|
314
|
+
qx/perl -MTime::Stamp=local-compact-ms -E 'say localstamp'/;
|
315
|
+
# with microseconds:
|
316
|
+
qx/perl -MTime::Stamp=local-compact-us -E 'say localstamp'/;
|
317
|
+
|
318
|
+
=head1 DESCRIPTION
|
319
|
+
|
320
|
+
This module makes it easy to include timestamp functions
|
321
|
+
that are simple, easy to read, easy to parse, and fast.
|
322
|
+
For simple timestamps perl's built-in functions are all you need:
|
323
|
+
L<time|perlfunc/time>,
|
324
|
+
L<gmtime|perlfunc/gmtime> (or L<localtime|perlfunc/localtime>),
|
325
|
+
and L<sprintf|perlfunc/sprintf>...
|
326
|
+
|
327
|
+
Sometimes you desire a simple timestamp to add to a file name
|
328
|
+
or use as part of a generated data identifier.
|
329
|
+
The fastest and easiest thing to do is call L<time()|perlfunc/time>
|
330
|
+
to get a seconds-since-epoch integer.
|
331
|
+
|
332
|
+
Sometimes you get a seconds-since-epoch integer from another function
|
333
|
+
(like L<stat()|perlfunc/stat> for instance)
|
334
|
+
and maybe you want to store that in a database or send it across the network.
|
335
|
+
|
336
|
+
This integer timestamp works for these purposes,
|
337
|
+
but it's not easy to read.
|
338
|
+
|
339
|
+
If you're looking at a list of timestamps you have to fire up a perl
|
340
|
+
interpreter and copy and paste the timestamp into
|
341
|
+
L<localtime()|perlfunc/localtime> to figure out when that actually was.
|
342
|
+
|
343
|
+
You can pass the timestamp to C<scalar localtime($sec)>
|
344
|
+
(or C<scalar gmtime($sec)>)
|
345
|
+
but that doesn't sort well or parse easily,
|
346
|
+
isn't internationally friendly,
|
347
|
+
and contains characters that aren't friendly for file names or URIs
|
348
|
+
(or other places you may want to use it).
|
349
|
+
|
350
|
+
See L<perlport/Time and Date> for more discussion on useful timestamps.
|
351
|
+
|
352
|
+
For simple timestamps you can get the data you need from
|
353
|
+
L<localtime|perlfunc/localtime> and L<gmtime|perlfunc/gmtime>
|
354
|
+
without incurring the resource cost of L<DateTime>
|
355
|
+
(or any other object for that matter).
|
356
|
+
|
357
|
+
So the aim of this module is to provide simple timestamp functions
|
358
|
+
so that you can have easy-to-use, easy-to-read timestamps efficiently.
|
359
|
+
|
360
|
+
=for test_synopsis my ( $file, $pattern, $stamp, $time );
|
361
|
+
|
362
|
+
=head1 FORMAT
|
363
|
+
|
364
|
+
For reasons listed elsewhere
|
365
|
+
the timestamps are always in order from largest unit to smallest:
|
366
|
+
year, month, day, hours, minutes, seconds
|
367
|
+
and are always two digits, except the year which is always four.
|
368
|
+
|
369
|
+
The other characters of the stamp are configurable:
|
370
|
+
|
371
|
+
=over 4
|
372
|
+
|
373
|
+
=item *
|
374
|
+
|
375
|
+
C<date_sep> - Character separating date components; Default: C<'-'>
|
376
|
+
|
377
|
+
=item *
|
378
|
+
|
379
|
+
C<dt_sep> - Character separating date and time; Default: C<'T'>
|
380
|
+
|
381
|
+
=item *
|
382
|
+
|
383
|
+
C<time_sep> - Character separating time components; Default: C<':'>
|
384
|
+
|
385
|
+
=item *
|
386
|
+
|
387
|
+
C<tz_sep> - Character separating time and timezone; Default: C<''>
|
388
|
+
|
389
|
+
=item *
|
390
|
+
|
391
|
+
C<tz> - Time zone designator; Default: C<''>
|
392
|
+
|
393
|
+
=item *
|
394
|
+
|
395
|
+
C<frac> - Digits of fractional seconds to show; Default: no fraction
|
396
|
+
|
397
|
+
=item *
|
398
|
+
|
399
|
+
C<ms> - Boolean shortcut: milliseconds; If true, same as C<< frac => 3 >>
|
400
|
+
|
401
|
+
=item *
|
402
|
+
|
403
|
+
C<us> - Boolean shortcut: microseconds; If true, same as C<< frac => 6 >>
|
404
|
+
|
405
|
+
=back
|
406
|
+
|
407
|
+
The following formats are predefined:
|
408
|
+
|
409
|
+
default => see above descriptions
|
410
|
+
iso8601 => \%default
|
411
|
+
rfc3339 => \%default
|
412
|
+
w3cdtf => \%default
|
413
|
+
"2010-01-02T13:14:15" # local
|
414
|
+
"2010-01-02T13:14:15Z" # gm
|
415
|
+
|
416
|
+
easy => like default but with a space as dt_sep and tz_sep (easier to read)
|
417
|
+
"2010-01-02 13:14:15" # local
|
418
|
+
"2010-01-02 13:14:15 Z" # gm
|
419
|
+
|
420
|
+
compact => condense date and time components and set dt_sep to '_'
|
421
|
+
"20100102_131415" # local
|
422
|
+
"20100102_131415Z" # gm
|
423
|
+
|
424
|
+
numeric => all options are '' so that only numbers remain
|
425
|
+
"20100102131415" # both
|
426
|
+
|
427
|
+
Currently there is no attempt to guess the time zone.
|
428
|
+
By default C<gmstamp> sets C<tz> to C<'Z'> (which you can override if desired).
|
429
|
+
If you are using C<gmstamp> (recommended for transmitting to another computer)
|
430
|
+
you don't need anything else. If you are using C<localstamp> you are probably
|
431
|
+
keeping the timestamp on that computer (like the stamp in a log file)
|
432
|
+
and you probably aren't concerned with time zone since it isn't likely to change.
|
433
|
+
|
434
|
+
If you want to include a time zone (other than C<'Z'> for UTC)
|
435
|
+
the standards suggest using the offset value (like C<-0700> or C<+12:00>).
|
436
|
+
If you would like to determine the time zone offset you can do something like:
|
437
|
+
|
438
|
+
use Time::Zone (); # or Time::Timezone
|
439
|
+
use Time::Stamp localtime => { tz => Time::Zone::tz_offset() };
|
440
|
+
|
441
|
+
If, despite the recommendations, you want to use the local time zone code:
|
442
|
+
|
443
|
+
use POSIX (); # included in perl core
|
444
|
+
use Time::Stamp localtime => { tz => POSIX::strftime('%Z', localtime) };
|
445
|
+
|
446
|
+
These options are not included in this module since they are not recommended
|
447
|
+
and introduce unnecessary overhead (loading the aforementioned modules).
|
448
|
+
|
449
|
+
=head1 EXPORTS
|
450
|
+
|
451
|
+
This module uses L<Sub::Exporter>
|
452
|
+
to enable you to customize your timestamp function
|
453
|
+
but still create it as easily as possible.
|
454
|
+
|
455
|
+
The customizations are done at import
|
456
|
+
and stored in the custom function returned
|
457
|
+
to make the resulting function as fast as possible.
|
458
|
+
|
459
|
+
The following groups and functions are available for export
|
460
|
+
(nothing is exported by default):
|
461
|
+
|
462
|
+
=head2 -stamps
|
463
|
+
|
464
|
+
This is a convenience group for importing both L</gmstamp> and L</localstamp>.
|
465
|
+
|
466
|
+
Each timestamp export accepts any of the keys listed in L</FORMAT>
|
467
|
+
as well as C<format> which can be the name of a predefined format.
|
468
|
+
|
469
|
+
use Time::Stamp '-stamps';
|
470
|
+
use Time::Stamp -stamps => { format => 'compact' };
|
471
|
+
|
472
|
+
use Time::Stamp gmstamp => { dt_sep => ' ', tz => ' UTC' };
|
473
|
+
|
474
|
+
use Time::Stamp localstamp => { -as => shorttime, format => 'compact' };
|
475
|
+
|
476
|
+
Each timestamp function will return a string according to the time as follows:
|
477
|
+
|
478
|
+
=over 4
|
479
|
+
|
480
|
+
=item *
|
481
|
+
|
482
|
+
If called with no arguments C<time()> (I<now>) will be used
|
483
|
+
|
484
|
+
(or L<Time::HiRes/gettimeofday> for fractional seconds).
|
485
|
+
|
486
|
+
=item *
|
487
|
+
|
488
|
+
A single argument should be an integer
|
489
|
+
(like that returned from C<time()> or C<stat()>).
|
490
|
+
|
491
|
+
If a floating point number is provided
|
492
|
+
(and fractional seconds were part of the format)
|
493
|
+
the fraction will be preserved (according to the specified precision).
|
494
|
+
|
495
|
+
=item *
|
496
|
+
|
497
|
+
More than one argument is assumed to be the list returned from
|
498
|
+
C<gmtime()> or C<localtime()> which can be useful if you previously called
|
499
|
+
the function and don't want to do it again.
|
500
|
+
|
501
|
+
If the first argument (seconds) is a floating point number
|
502
|
+
(and fractional seconds were part of the format)
|
503
|
+
the fraction will be preserved (according to the specified precision).
|
504
|
+
|
505
|
+
=back
|
506
|
+
|
507
|
+
Most commonly the 0 or 1 argument form would be used,
|
508
|
+
but the shortcut of using a time array is provided
|
509
|
+
in case you already have the array so that you don't have to use
|
510
|
+
L<Time::Local> just to get the integer back.
|
511
|
+
|
512
|
+
=head2 gmstamp
|
513
|
+
|
514
|
+
$stamp = gmstamp(); # equivalent to gmstamp(time())
|
515
|
+
$stamp = gmstamp($seconds);
|
516
|
+
$stamp = gmstamp(@gmtime);
|
517
|
+
|
518
|
+
This returns a string according to the format specified in the import call.
|
519
|
+
|
520
|
+
By default this function sets C<tz> to C<'Z'>
|
521
|
+
since C<gmtime()> returns values in C<UTC> (no time zone offset).
|
522
|
+
|
523
|
+
This is the recommended stamp as it is by default unambiguous
|
524
|
+
and useful for transmitting to another computer.
|
525
|
+
|
526
|
+
=head2 localstamp
|
527
|
+
|
528
|
+
$stamp = localstamp(); # equivalent to localstamp(time())
|
529
|
+
$stamp = localstamp($seconds);
|
530
|
+
$stamp = localstamp(@localtime);
|
531
|
+
|
532
|
+
This returns a string according to the format specified in the import call.
|
533
|
+
|
534
|
+
By default this function does not include a time zone indicator.
|
535
|
+
|
536
|
+
This function can be useful for log files or other values that stay
|
537
|
+
on the machine where time zone is not important and/or is constant.
|
538
|
+
|
539
|
+
=head2 -parsers
|
540
|
+
|
541
|
+
This is a convenience group for importing both L</parsegm> and L</parselocal>.
|
542
|
+
|
543
|
+
use Time::Stamp '-parsers';
|
544
|
+
use Time::Stamp -parsers => { regexp => qr/pattern/ };
|
545
|
+
|
546
|
+
use Time::Stamp 'parsegm';
|
547
|
+
|
548
|
+
use Time::Stamp parselocal => { -as => 'parsestamp', regexp => qr/pattern/ };
|
549
|
+
|
550
|
+
The parser functions are the inverse of the stamp functions.
|
551
|
+
They accept a timestamp and use the appropriate function from L<Time::Local>
|
552
|
+
to turn it back into a seconds-since-epoch integer.
|
553
|
+
|
554
|
+
In list context they return the list that would have been sent to L<Time::Local>
|
555
|
+
which is similar to the one returned by
|
556
|
+
L<gmtime|perlfunc/gmtime> and L<localtime|perlfunc/localtime>:
|
557
|
+
seconds, minutes, hours, day, month (0-11), year (-1900).
|
558
|
+
B<NOTE> that the C<wday>, C<yday>, and C<isdst> parameters
|
559
|
+
(the last three elements returned from C<localtime> or C<gmtime>)
|
560
|
+
are not returned because they are not easily determined from the stamp.
|
561
|
+
Besides L<Time::Local> only takes the first 6 anyway.
|
562
|
+
|
563
|
+
If the stamp doesn't match the pattern
|
564
|
+
the function will return undef in scalar context
|
565
|
+
or an empty list in list context.
|
566
|
+
|
567
|
+
An alternate regular expression can be supplied as the C<regexp> parameter
|
568
|
+
during import. The default pattern will match any of the named formats.
|
569
|
+
|
570
|
+
The pattern must capture 6 groups in the appropriate order:
|
571
|
+
year, month, day, hour, minute, second.
|
572
|
+
If you're doing something more complex you probably ought to be using
|
573
|
+
one of the modules listed in L<SEE ALSO>.
|
574
|
+
|
575
|
+
An optional 7th group can be used to capture the fractional seconds.
|
576
|
+
If only 6 groups are used, the 6th capture (seconds)
|
577
|
+
will be checked for a fraction.
|
578
|
+
The fraction will be separated from the whole number
|
579
|
+
before being passed through the L<Time::Local> functions
|
580
|
+
then appended to the result
|
581
|
+
(the number returned in scalar context,
|
582
|
+
or to the first element returned in list context)
|
583
|
+
in an attempt to provide the most expected/reliable result.
|
584
|
+
|
585
|
+
=head2 parsegm
|
586
|
+
|
587
|
+
$seconds = parsegm($stamp);
|
588
|
+
@gmtime = parsegm($stamp);
|
589
|
+
|
590
|
+
This is the inverse of L</gmstamp>.
|
591
|
+
It parses a timestamp (like the ones created by this module) and uses
|
592
|
+
L<Time::Local/timegm> to turn it back into a seconds-since-epoch integer.
|
593
|
+
|
594
|
+
=head2 parselocal
|
595
|
+
|
596
|
+
$seconds = parselocal($stamp);
|
597
|
+
@localtime = parselocal($stamp);
|
598
|
+
|
599
|
+
This is the inverse of L</localstamp>.
|
600
|
+
It parses a timestamp (like the ones created by this module) and uses
|
601
|
+
L<Time::Local/timelocal> to it them back into a seconds-since-epoch integer.
|
602
|
+
|
603
|
+
=head2 SHORTCUTS
|
604
|
+
|
605
|
+
There are also shortcuts available in the format of C<< type-format >>
|
606
|
+
that export the appropriate function using the named format.
|
607
|
+
|
608
|
+
For example:
|
609
|
+
|
610
|
+
=over 4
|
611
|
+
|
612
|
+
=item *
|
613
|
+
|
614
|
+
C<local-compact> exports a L</localstamp> function using the C<compact> format
|
615
|
+
|
616
|
+
=item *
|
617
|
+
|
618
|
+
C<gm-easy> exports a L</gmstamp> function using the C<easy> format
|
619
|
+
|
620
|
+
=back
|
621
|
+
|
622
|
+
This makes the module easier to use on the command line:
|
623
|
+
|
624
|
+
perl -MTime::Stamp=local-compact -E 'say localstamp'
|
625
|
+
|
626
|
+
Rather than:
|
627
|
+
|
628
|
+
perl -E 'use Time::Stamp localstamp => { format => "compact" }; say localstamp'
|
629
|
+
|
630
|
+
Any of the predefined formats named in L</FORMAT>
|
631
|
+
can be used in the shortcut notation.
|
632
|
+
|
633
|
+
Additionally recognized flags include:
|
634
|
+
|
635
|
+
=over 4
|
636
|
+
|
637
|
+
=item *
|
638
|
+
|
639
|
+
C<us> adds microseconds (6 digit precision): C<< local-easy-us >>
|
640
|
+
|
641
|
+
=item *
|
642
|
+
|
643
|
+
C<ms> adds milliseconds (3 digit precision): C<< gm-ms >>
|
644
|
+
|
645
|
+
=back
|
646
|
+
|
647
|
+
=head1 SEE ALSO
|
648
|
+
|
649
|
+
=over 4
|
650
|
+
|
651
|
+
=item *
|
652
|
+
|
653
|
+
L<perlport/Time and Date> - discussion on using portable, readable timestamps
|
654
|
+
|
655
|
+
=item *
|
656
|
+
|
657
|
+
L<perlfunc/localtime> - built-in function
|
658
|
+
|
659
|
+
=item *
|
660
|
+
|
661
|
+
L<perlfunc/gmtime> - built-in function
|
662
|
+
|
663
|
+
=item *
|
664
|
+
|
665
|
+
L<Timestamp::Simple> - small, less efficient, non-customizable stamp
|
666
|
+
|
667
|
+
=item *
|
668
|
+
|
669
|
+
L<Time::Piece> - object-oriented module for working with times
|
670
|
+
|
671
|
+
=item *
|
672
|
+
|
673
|
+
L<DateTime::Tiny> - object-oriented module "with as little code as possible"
|
674
|
+
|
675
|
+
=item *
|
676
|
+
|
677
|
+
L<DateTime> - large, powerful object-oriented system
|
678
|
+
|
679
|
+
=item *
|
680
|
+
|
681
|
+
L<Time::localtime> - small object-oriented/named interface to C<localtime()>
|
682
|
+
|
683
|
+
=item *
|
684
|
+
|
685
|
+
L<Time::gmtime> - small object-oriented/named interface to C<gmtime()>
|
686
|
+
|
687
|
+
=item *
|
688
|
+
|
689
|
+
L<POSIX> - large module containing standard methods including C<strftime()>
|
690
|
+
|
691
|
+
=item *
|
692
|
+
|
693
|
+
L<http://www.cl.cam.ac.uk/~mgk25/iso-time.html> - summary of C<ISO 8601>
|
694
|
+
|
695
|
+
=item *
|
696
|
+
|
697
|
+
L<http://www.w3.org/TR/NOTE-datetime> - C<W3CDTF> profile of C<ISO 8601>
|
698
|
+
|
699
|
+
=item *
|
700
|
+
|
701
|
+
L<http://www.ietf.org/rfc/rfc3339.txt> - C<RFC3339> profile of C<ISO 8601>
|
702
|
+
|
703
|
+
=back
|
704
|
+
|
705
|
+
=head1 TODO
|
706
|
+
|
707
|
+
=over 4
|
708
|
+
|
709
|
+
=item *
|
710
|
+
|
711
|
+
Allow an option for overwriting the globals
|
712
|
+
so that calling C<localtime> in scalar context will return
|
713
|
+
a stamp in the desired format.
|
714
|
+
The normal values will be returned in list context.
|
715
|
+
|
716
|
+
=back
|
717
|
+
|
718
|
+
=head1 SUPPORT
|
719
|
+
|
720
|
+
=head2 Perldoc
|
721
|
+
|
722
|
+
You can find documentation for this module with the perldoc command.
|
723
|
+
|
724
|
+
perldoc Time::Stamp
|
725
|
+
|
726
|
+
=head2 Websites
|
727
|
+
|
728
|
+
The following websites have more information about this module, and may be of help to you. As always,
|
729
|
+
in addition to those websites please use your favorite search engine to discover more resources.
|
730
|
+
|
731
|
+
=over 4
|
732
|
+
|
733
|
+
=item *
|
734
|
+
|
735
|
+
Search CPAN
|
736
|
+
|
737
|
+
The default CPAN search engine, useful to view POD in HTML format.
|
738
|
+
|
739
|
+
L<http://search.cpan.org/dist/Time-Stamp>
|
740
|
+
|
741
|
+
=item *
|
742
|
+
|
743
|
+
RT: CPAN's Bug Tracker
|
744
|
+
|
745
|
+
The RT ( Request Tracker ) website is the default bug/issue tracking system for CPAN.
|
746
|
+
|
747
|
+
L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=Time-Stamp>
|
748
|
+
|
749
|
+
=item *
|
750
|
+
|
751
|
+
CPAN Ratings
|
752
|
+
|
753
|
+
The CPAN Ratings is a website that allows community ratings and reviews of Perl modules.
|
754
|
+
|
755
|
+
L<http://cpanratings.perl.org/d/Time-Stamp>
|
756
|
+
|
757
|
+
=item *
|
758
|
+
|
759
|
+
CPAN Testers
|
760
|
+
|
761
|
+
The CPAN Testers is a network of smokers who run automated tests on uploaded CPAN distributions.
|
762
|
+
|
763
|
+
L<http://www.cpantesters.org/distro/T/Time-Stamp>
|
764
|
+
|
765
|
+
=item *
|
766
|
+
|
767
|
+
CPAN Testers Matrix
|
768
|
+
|
769
|
+
The CPAN Testers Matrix is a website that provides a visual overview of the test results for a distribution on various Perls/platforms.
|
770
|
+
|
771
|
+
L<http://matrix.cpantesters.org/?dist=Time-Stamp>
|
772
|
+
|
773
|
+
=item *
|
774
|
+
|
775
|
+
CPAN Testers Dependencies
|
776
|
+
|
777
|
+
The CPAN Testers Dependencies is a website that shows a chart of the test results of all dependencies for a distribution.
|
778
|
+
|
779
|
+
L<http://deps.cpantesters.org/?module=Time::Stamp>
|
780
|
+
|
781
|
+
=back
|
782
|
+
|
783
|
+
=head2 Bugs / Feature Requests
|
784
|
+
|
785
|
+
Please report any bugs or feature requests by email to C<bug-time-stamp at rt.cpan.org>, or through
|
786
|
+
the web interface at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Time-Stamp>. You will be automatically notified of any
|
787
|
+
progress on the request by the system.
|
788
|
+
|
789
|
+
=head2 Source Code
|
790
|
+
|
791
|
+
|
792
|
+
L<https://github.com/rwstauner/Time-Stamp>
|
793
|
+
|
794
|
+
git clone https://github.com/rwstauner/Time-Stamp.git
|
795
|
+
|
796
|
+
=head1 AUTHOR
|
797
|
+
|
798
|
+
Randy Stauner <rwstauner@cpan.org>
|
799
|
+
|
800
|
+
=head1 COPYRIGHT AND LICENSE
|
801
|
+
|
802
|
+
This software is copyright (c) 2011 by Randy Stauner.
|
803
|
+
|
804
|
+
This is free software; you can redistribute it and/or modify it under
|
805
|
+
the same terms as the Perl 5 programming language system itself.
|
806
|
+
|
807
|
+
=cut
|
808
|
+
|