wukong 1.5.3 → 1.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. data/CHANGELOG.textile +4 -0
  2. data/bin/hdp-bin +44 -0
  3. data/bin/hdp-ls +2 -1
  4. data/docpages/avro/performance.textile +36 -0
  5. data/examples/cassandra_streaming/avromapper.rb +85 -0
  6. data/examples/cassandra_streaming/berlitz_for_cassandra.textile +22 -0
  7. data/examples/cassandra_streaming/cassandra.avpr +468 -0
  8. data/examples/cassandra_streaming/cassandra_random_partitioner.rb +62 -0
  9. data/examples/cassandra_streaming/catter.sh +45 -0
  10. data/examples/cassandra_streaming/client_interface_notes.textile +200 -0
  11. data/examples/cassandra_streaming/client_schema.avpr +211 -0
  12. data/examples/cassandra_streaming/client_schema.textile +318 -0
  13. data/examples/cassandra_streaming/foofile.avr +0 -0
  14. data/examples/cassandra_streaming/pymap.sh +1 -0
  15. data/examples/cassandra_streaming/pyreduce.sh +1 -0
  16. data/examples/cassandra_streaming/smutation.avpr +188 -0
  17. data/examples/cassandra_streaming/streamer.sh +51 -0
  18. data/examples/cassandra_streaming/struct_loader.rb +24 -0
  19. data/examples/cassandra_streaming/tuning.textile +73 -0
  20. data/examples/emr/README-elastic_map_reduce.textile +26 -0
  21. data/examples/emr/dot_wukong_dir/credentials.json +7 -0
  22. data/examples/emr/{emr.yaml → dot_wukong_dir/emr.yaml} +33 -16
  23. data/{bin/bootstrap.sh → examples/emr/dot_wukong_dir/emr_bootstrap.sh} +1 -1
  24. data/examples/emr/elastic_mapreduce_example.rb +1 -0
  25. data/lib/wukong/encoding/asciize.rb +108 -0
  26. data/lib/wukong/extensions/date_time.rb +33 -7
  27. data/lib/wukong/extensions/emittable.rb +12 -25
  28. data/lib/wukong/extensions/hash_like.rb +13 -6
  29. data/lib/wukong/filename_pattern.rb +8 -7
  30. data/lib/wukong/schema.rb +47 -0
  31. data/lib/wukong/script.rb +7 -0
  32. data/lib/wukong/script/cassandra_loader_script.rb +40 -0
  33. data/lib/wukong/script/emr_command.rb +74 -43
  34. data/lib/wukong/script/hadoop_command.rb +89 -72
  35. data/lib/wukong/store.rb +2 -7
  36. data/lib/wukong/store/cassandra.rb +10 -0
  37. data/lib/wukong/store/cassandra/streaming.rb +75 -0
  38. data/lib/wukong/store/cassandra/struct_loader.rb +21 -0
  39. data/lib/wukong/store/cassandra_model.rb +90 -0
  40. data/lib/wukong/store/chh_chunked_flat_file_store.rb +1 -1
  41. data/lib/wukong/store/chunked_flat_file_store.rb +24 -20
  42. data/wukong.gemspec +32 -4
  43. metadata +33 -14
@@ -0,0 +1,51 @@
1
+ #!/usr/bin/env bash
2
+
3
+ input_file="$1" ; shift
4
+ output_file="$1" ; shift
5
+ map_script=${1-/bin/cat} ; shift
6
+ reduce_script=${1-/usr/bin/uniq} ; shift
7
+
8
+ dest_keyspace=${dest_keyspace-soc_net_tw}
9
+ dest_col_family=${dest_col_family-Wordbag}
10
+
11
+ hostname=`hostname`
12
+
13
+ # Path to cassandra and hadoop dirs
14
+ script_dir=$(readlink -f `dirname $0`)
15
+ CASSANDRA_HOME=${CASSANDRA_HOME-/usr/local/share/cassandra}
16
+ HADOOP_HOME=${HADOOP_HOME-/usr/lib/hadoop}
17
+ avro_file=${avro_file-$CASSANDRA_HOME/interface/avro/cassandra.avpr}
18
+
19
+ ARCHIVES=`/bin/ls -1 $CASSANDRA_HOME/build/apache-cassandra*.jar`
20
+ for jar in `/bin/ls -1 $CASSANDRA_HOME/build/lib/jars/*.jar $CASSANDRA_HOME/lib/*.jar`; do
21
+ ARCHIVES=$ARCHIVES,$jar
22
+ done
23
+
24
+ ${HADOOP_HOME}/bin/hadoop \
25
+ jar ${HADOOP_HOME}/contrib/streaming/hadoop-*streaming*.jar \
26
+ -D stream.map.output=cassandra_avro_output \
27
+ -D stream.io.identifier.resolver.class=org.apache.cassandra.hadoop.streaming.AvroResolver \
28
+ -D cassandra.output.keyspace="$dest_keyspace" \
29
+ -D cassandra.output.columnfamily="$dest_col_family" \
30
+ -D cassandra.thrift.address=10.204.41.193,10.204.30.11,10.204.58.238,10.204.239.133,10.196.191.31,10.204.103.21,10.202.74.223,10.202.143.95 \
31
+ -D cassandra.partitioner.class=org.apache.cassandra.dht.RandomPartitioner \
32
+ -D cassandra.thrift.port=9160 \
33
+ -D mapreduce.output.columnfamilyoutputformat.batch.threshold=1024 \
34
+ -D mapred.reduce.tasks=0 \
35
+ -D mapred.map.tasks.speculative.execution=false \
36
+ -libjars $ARCHIVES \
37
+ -file $avro_file \
38
+ -outputformat org.apache.cassandra.hadoop.ColumnFamilyOutputFormat \
39
+ -mapper "ruby $script_dir/avromapper.rb --map " \
40
+ -input "$input_file" \
41
+ -output "$output_file" \
42
+ "$@"
43
+
44
+ # -D cassandra.thrift.address=10.204.54.190,10.244.42.31,10.244.42.176,10.244.42.112,10.244.42.143,10.244.42.79,10.244.42.4,10.204.53.166 \
45
+ # -D cassandra.thrift.address=10.204.221.230,10.243.79.223,10.245.19.159,10.242.154.159,10.242.153.155,10.242.153.203 \
46
+
47
+
48
+ # cat /tmp/mj-flip/chimchim-info.log | cut -f5 | ruby -e 'puts $stdin.readlines.map{|l| l.chomp.gsub(/ip-([0-9\-]+)\..*/,"\\1").gsub(/-/,".") }.join(",")'
49
+
50
+
51
+
@@ -0,0 +1,24 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'wukong'
4
+ require 'wukong/periodic_monitor'
5
+ require 'wukong/store/cassandra'
6
+ require 'wukong/script/cassandra_loader_script'
7
+
8
+ Settings.use :commandline
9
+ Settings.define :log_interval, :default => 1
10
+ Settings.cassandra_keyspace = 'soc_net_tw'
11
+ Settings.cassandra_col_family = 'TwitterUser'
12
+ Settings.cassandra_hosts = "ip-10-204-41-193.ec2.internal:9160,ip-10-204-30-11.ec2.internal:9160,ip-10-204-58-238.ec2.internal:9160,ip-10-204-239-133.ec2.internal:9160,ip-10-196-191-31.ec2.internal:9160,ip-10-204-103-21.ec2.internal:9160,ip-10-202-74-223.ec2.internal:9160,ip-10-202-143-95.ec2.internal:9160"
13
+ Settings.resolve!
14
+
15
+ require 'cassandra/0.7'
16
+ require 'wuclan/twitter' ; include Wuclan::Twitter
17
+ require 'wuclan/twitter/cassandra_db'
18
+ require 'wukong/store/cassandra/streaming'
19
+
20
+ # hdp-catd s3://s3hdfs.infinitemonkeys.info/data/sn/tw/fixd/objects/twitter_user | head
21
+
22
+ # CassandraScript.new(Wukong::Store::Cassandra::StructLoader, nil).run
23
+ Wukong::CassandraScript.new(Wukong::Store::Cassandra::StructLoader, nil).run
24
+
@@ -0,0 +1,73 @@
1
+
2
+
3
+ Start
4
+
5
+ 5 c1.xlarge
6
+ 2000 writes/sec
7
+ 40 clients
8
+
9
+ 4 m2.xlarge
10
+
11
+ :java_max_heap => "12500M", #
12
+ # :flush_data_buffer_size => 32, # 32,
13
+ # :flush_index_buffer_size => 8, # 8,
14
+ # :binary_memtable_throughput => 256, # 256,
15
+ # :memtable_flush_after => 60, # 60,
16
+ # :memtable_throughput => 64, # 64,
17
+ # :memtable_ops => 0.3, # 0.3,
18
+ # :column_index_size => 64, # 64,
19
+ # :in_memory_compaction_limit => 64 # 64
20
+ :concurrent_reads => 8, # 8
21
+ :concurrent_writes => 250, # 32
22
+
23
+ /usr/bin/java -ea \
24
+ -Xms128M \
25
+ -Xmx12500M \
26
+ -XX:TargetSurvivorRatio=90 \
27
+ -XX:+AggressiveOpts \
28
+ -XX:+UseParNewGC \
29
+ -XX:+UseConcMarkSweepGC \
30
+ -XX:+CMSParallelRemarkEnabled \
31
+ -XX:+HeapDumpOnOutOfMemoryError \
32
+ -XX:SurvivorRatio=128 \
33
+ -XX:MaxTenuringThreshold=0 \
34
+ -Djava.rmi.server.hostname=ec2-184-73-20-37.compute-1.amazonaws.com \
35
+ -Dcom.sun.management.jmxremote.port=12345 \
36
+ -Dcom.sun.management.jmxremote.ssl=false \
37
+ -Dcom.sun.management.jmxremote.authenticate=false \
38
+ -Dcassandra \
39
+ -Dstorage-config=/etc/cassandra \
40
+ -Dcassandra-foreground=yes \
41
+ -cp /etc/cassandra:/usr/local/share/cassandra/build/classes:/usr/local/share/cassandra/lib/antlr-3.1.3.jar:/usr/local/share/cassandra/lib/avro-1.3.3-sources~cust1.jar:/usr/local/share/cassandra/lib/avro-1.3.3~cust2.jar:/usr/local/share/cassandra/lib/clhm-production.jar:/usr/local/share/cassandra/lib/commons-cli-1.1.jar:/usr/local/share/cassandra/lib/commons-codec-1.2.jar:/usr/local/share/cassandra/lib/commons-collections-3.2.1.jar:/usr/local/share/cassandra/lib/commons-lang-2.4.jar:/usr/local/share/cassandra/lib/guava-r05.jar:/usr/local/share/cassandra/lib/hadoop-core-0.20.1.jar:/usr/local/share/cassandra/lib/high-scale-lib.jar:/usr/local/share/cassandra/lib/jackson-core-asl-1.4.0.jar:/usr/local/share/cassandra/lib/jackson-mapper-asl-1.4.0.jar:/usr/local/share/cassandra/lib/jetty-6.1.21.jar:/usr/local/share/cassandra/lib/jetty-util-6.1.21.jar:/usr/local/share/cassandra/lib/jline-0.9.94.jar:/usr/local/share/cassandra/lib/json-simple-1.1.jar:/usr/local/share/cassandra/lib/jug-2.0.0.jar:/usr/local/share/cassandra/lib/libthrift-r959516.jar:/usr/local/share/cassandra/lib/log4j-1.2.16.jar:/usr/local/share/cassandra/lib/servlet-api-2.5-20081211.jar:/usr/local/share/cassandra/lib/slf4j-api-1.5.8.jar:/usr/local/share/cassandra/lib/slf4j-log4j12-1.5.8.jar:/usr/local/share/cassandra/lib/snakeyaml-1.6.jar\
42
+ org.apache.cassandra.thrift.CassandraDaemon
43
+
44
+
45
+ avg-cpu: %user %nice %system %iowait %steal %idle
46
+ 81.83 0.00 1.96 0.00 0.00 16.21
47
+
48
+ Device: tps Blk_read/s Blk_wrtn/s Blk_read Blk_wrtn
49
+ sda1 155.12 9.45 11450.39 48 58168
50
+ sdb 2.76 0.00 22.05 0 112
51
+
52
+ avg-cpu: %user %nice %system %iowait %steal %idle
53
+ 83.72 0.00 3.80 0.20 0.00 12.29
54
+
55
+ Device: tps Blk_read/s Blk_wrtn/s Blk_read Blk_wrtn
56
+ sda1 66.53 1.59 3921.91 8 19688
57
+ sdb 100.20 0.00 6686.85 0 33568
58
+
59
+ avg-cpu: %user %nice %system %iowait %steal %idle
60
+ 66.40 0.00 5.00 0.80 0.40 27.40
61
+
62
+ Device: tps Blk_read/s Blk_wrtn/s Blk_read Blk_wrtn
63
+ sda1 2.40 0.00 19.20 0 96
64
+ sdb 186.80 0.00 15318.40 0 76592
65
+
66
+ avg-cpu: %user %nice %system %iowait %steal %idle
67
+ 80.98 0.00 6.08 1.99 0.00 10.96
68
+
69
+ Device: tps Blk_read/s Blk_wrtn/s Blk_read Blk_wrtn
70
+ sda1 113.97 0.00 7426.75 0 37208
71
+ sdb 360.28 1.60 29232.73 8 146456
72
+
73
+
@@ -0,0 +1,26 @@
1
+
2
+ # Download the Amazon elastic-mapreduce runner from http://elasticmapreduce.s3.amazonaws.com/elastic-mapreduce-ruby.zip
3
+
4
+ # Create a bucket and path to hold your EMR logs, scripts and other ephemera. For instance you might choose 'emr.yourdomain.com' as the bucket and '/wukong' as a scoping path within that bucket. In that case you will refer to it with a path like s3n://emr.yourdomain.com/wukong (see notes below about s3n:// vs. s3:// URLs).
5
+
6
+ # Copy the contents of wukong/examples/emr/dot_wukong_dir to ~/.wukong
7
+ # Edit emr.yaml -- it has instructions for the
8
+
9
+
10
+
11
+
12
+
13
+ h3. s3n:// vs. s3:// URLs
14
+
15
+ Many external tools use a URI convention to address files in S3; they typically use the 's3://' scheme, which makes a lot of sense:
16
+ s3://emr.yourcompany.com/wukong/happy_job_1/logs/whatever-20100808.log
17
+
18
+ Hadoop can maintain an HDFS on the Amazon S3: it uses a block structure and has optimizations for streaming, no file size limitation, and other goodness. However, only hadoop tools can interpret the contents of those blocks -- to everything else it just looks like a soup of blocks labelled block_-8675309 and so forth. Hadoop unfortunately chose the 's3://' scheme for URIs in this filesystem:
19
+ s3://s3hdfs.yourcompany.com/path/to/data
20
+
21
+ Hadoop is happy to read s3 native files -- 'native' as in, you can look at them with a browser and upload them an download them with any S3 tool out there. There's a 5GB limit on file size, and in some cases a performance hit (but not in our experience enough to worry about). You refer to these files with the 's3n://' scheme ('n' as in 'native'):
22
+ s3n://emr.yourcompany.com/wukong/happy_job_1/code/happy_job_1-mapper.rb
23
+ s3n://emr.yourcompany.com/wukong/happy_job_1/code/happy_job_1-reducer.rb
24
+ s3n://emr.yourcompany.com/wukong/happy_job_1/logs/whatever-20100808.log
25
+
26
+ Wukong will coerce things to the right scheme when it knows what that scheme should be (eg. code should be s3n://). It will otherwise leave the path alone. Specifically, if you use a URI scheme for input and output paths you must use 's3n://' for normal s3 files.
@@ -0,0 +1,7 @@
1
+ {
2
+ "key-pair": "gibbon",
3
+ "key-pair-file": "/home/your/.wukong/keypairs/gibbon.pem",
4
+ "access-id": "YOURACCESSID",
5
+ "private-key": "YOURPRIVATEKEY",
6
+ "region": "us-east-1",
7
+ }
@@ -2,51 +2,68 @@
2
2
  # Elastic MapReduce config in wukong
3
3
  #
4
4
 
5
+ # ===========================================================================
5
6
  #
6
7
  # Infrastructure options
7
8
  #
8
9
 
9
- # == Fill all your information into yet another file with your amazon key Sorry
10
- # that it needs to be in so many stupid places, nobody can agree on a
10
+ # == Fill all your information into yet another file with your amazon key
11
+ # It needs to be in so many stupid places because nobody can agree on a
11
12
  # filename or format.
13
+ #
12
14
  :emr_credentials_file: ~/.wukong/credentials.json
15
+
13
16
  #
14
- # == Set the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY env vars, or enter them here:
17
+ # == Set the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY env vars, or enter them here
18
+ #
15
19
  # :access_key: ASDFAHKHASDF
16
20
  # :secret_access_key: ADSGHASDFJASDFASDF
21
+
22
+ # == Path to your keypair file.
17
23
  #
18
- # == Path to your keypair file.
19
24
  :key_pair_file: ~/.wukong/keypairs/gibbon.pem
20
- # == Keypair will be named after your file, or force the name:
25
+
26
+ # == Keypair will be named after your file, or force the name
27
+ #
21
28
  # :key_pair: ~
22
29
 
23
30
  # == Path to the Amazon elastic-mapreduce runner. Get a copy from
24
31
  # http://elasticmapreduce.s3.amazonaws.com/elastic-mapreduce-ruby.zip
32
+ #
25
33
  :emr_runner: ~/ics/hadoop/elastic-mapreduce/elastic-mapreduce
26
34
 
35
+ # ===========================================================================
36
+ #
37
+ # Remote Paths
38
+ #
39
+
40
+ # == Wukong is opinionated about the paths and locations of scripts and
41
+ # everything. It will organize files by job name within the following path:
42
+ #
43
+ :emr_root: s3://s3n.infinitemonkeys.info/emr
44
+
45
+ # == If you specify the :emr_data_root path, then relative pathnames -- ones that
46
+ # do not look like a URI (s3://yadda/yada) and do not start with a '/' -- will
47
+ # be prefixed with this path prefix.
48
+ :emr_data_root: s3n://s3n.infinitemonkeys.info/data
49
+
50
+
51
+ # ===========================================================================
27
52
  #
28
53
  # Cluster Config
29
54
  #
30
55
  :num_instances: 1
31
- :instance_type: m2.xlarge
56
+ :instance_type: m1.small
32
57
  :master_instance_type: ~
33
58
  :hadoop_version: '0.20'
34
59
  :availability_zone: us-east-1b
35
60
 
61
+ # ===========================================================================
36
62
  #
37
63
  # Running and reporting options
38
64
  #
39
- :alive: false
65
+ :alive: true
40
66
  :enable_debugging: true
41
67
  :emr_runner_verbose: true
42
68
  :emr_runner_debug: ~
43
69
  :step_action: CANCEL_AND_WAIT # CANCEL_AND_WAIT, TERMINATE_JOB_FLOW or CONTINUE
44
-
45
- #
46
- # Remote Paths
47
- #
48
- # Wukong is opinionated about the paths and locations of scripts and
49
- # everything. Make an S3 bucket and let the wookiee win -- or hack
50
- # lib/wukong/script/emr_command.rb to be more flexible and send us back a patch.
51
- #
52
- :emr_root: s3n://emr.infinitemonkeys.info
@@ -24,7 +24,7 @@ sudo apt-get install -y unzip build-essential git-core ruby ruby1.8-dev rubygems
24
24
  echo "`date` Unchaining rubygems from the tyrrany of ubuntu"
25
25
  sudo gem install --no-rdoc --no-ri rubygems-update --version=1.3.7 ; sudo /var/lib/gems/1.8/bin/update_rubygems; sudo gem update --no-rdoc --no-ri --system ; gem --version ;
26
26
 
27
- echo "`date` Installing wukong gems"
27
+ echo "`date` Installing wukong and related gems"
28
28
  sudo gem install --no-rdoc --no-ri addressable extlib htmlentities configliere yard wukong right_aws uuidtools cheat
29
29
  sudo gem list
30
30
 
@@ -24,4 +24,5 @@ class FooStreamer < Wukong::Streamer::LineStreamer
24
24
  end
25
25
  end
26
26
 
27
+ Settings.resolve!
27
28
  Wukong::Script.new(FooStreamer, FooStreamer).run
@@ -0,0 +1,108 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # http://www.jroller.com/obie/tags/unicode
4
+ # http://www.unicode.org/faq/casemap_charprop.html
5
+ # http://unicode.org/reports/tr10/#Conformance
6
+ # http://intertwingly.net/stories/2009/11/30/asciize.rb
7
+ # http://blog.stevenlevithan.com/archives/javascript-regex-and-unicode
8
+ #
9
+ # http://xregexp.com/tests/unicode.html
10
+
11
+ class String
12
+ #
13
+ # Taken from http://intertwingly.net/stories/2009/11/30/asciize.rb
14
+ #
15
+ def asciize(name)
16
+ if name =~ /[^\x00-\x7F]/
17
+ # digraphs. May be culturally sensitive
18
+ name.gsub! /\xc3\x9f/, 'ss'
19
+ name.gsub! /\xc3\xa4|a\xcc\x88/, 'ae'
20
+ name.gsub! /\xc3\xa5|a\xcc\x8a/, 'aa'
21
+ name.gsub! /\xc3\xa6/, 'ae'
22
+ name.gsub! /\xc3\xb1|n\xcc\x83/, 'ny'
23
+ name.gsub! /\xc3\xb6|o\xcc\x88/, 'oe'
24
+ name.gsub! /\xc3\xbc|u\xcc\x88/, 'ue'
25
+
26
+ # latin 1
27
+ name.gsub! /\xc3[\xa0-\xa5]/, 'a'
28
+ name.gsub! /\xc3\xa7/, 'c'
29
+ name.gsub! /\xc3[\xa8-\xab]/, 'e'
30
+ name.gsub! /\xc3[\xac-\xaf]/, 'i'
31
+ name.gsub! /\xc3[\xb2-\xb6]|\xc3\xb8/, 'o'
32
+ name.gsub! /\xc3[\xb9-\xbc]/, 'u'
33
+ name.gsub! /\xc3[\xbd\xbf]/, 'y'
34
+
35
+ # Latin Extended-A
36
+ name.gsub! /\xc4[\x80-\x85]/, 'a'
37
+ name.gsub! /\xc4[\x86-\x8d]/, 'c'
38
+ name.gsub! /\xc4[\x8e-\x91]/, 'd'
39
+ name.gsub! /\xc4[\x92-\x9b]/, 'e'
40
+ name.gsub! /\xc4[\x9c-\xa3]/, 'g'
41
+ name.gsub! /\xc4[\xa4-\xa7]/, 'h'
42
+ name.gsub! /\xc4[\xa8-\xb1]/, 'i'
43
+ name.gsub! /\xc4[\xb2-\xb3]/, 'ij'
44
+ name.gsub! /\xc4[\xb4-\xb5]/, 'j'
45
+ name.gsub! /\xc4[\xb6-\xb8]/, 'k'
46
+ name.gsub! /\xc4[\xb9-\xff]|\xc5[\x80-\x82]/, 'l'
47
+ name.gsub! /\xc5[\x83-\x8b]/, 'n'
48
+ name.gsub! /\xc5[\x8c-\x91]/, 'o'
49
+ name.gsub! /\xc5[\x92-\x93]/, 'oe'
50
+ name.gsub! /\xc5[\x94-\x99]/, 'r'
51
+ name.gsub! /\xc5[\x9a-\xa2]/, 's'
52
+ name.gsub! /\xc5[\xa2-\xa7]/, 't'
53
+ name.gsub! /\xc5[\xa8-\xb3]/, 'u'
54
+ name.gsub! /\xc5[\xb4-\xb5]/, 'w'
55
+ name.gsub! /\xc5[\xb6-\xb8]/, 'y'
56
+ name.gsub! /\xc5[\xb9-\xbe]/, 'z'
57
+
58
+ # denormalized diacritics
59
+ name.gsub! /\xcc[\x80-\xff]|\xcd[\x80-\xaf]/, ''
60
+ end
61
+
62
+ name.gsub /[^\w]+/, '-'
63
+ end
64
+
65
+ end
66
+
67
+ if __FILE__ == $PROGRAM_NAME
68
+ i18n = "I\xc3\xb1t\xc3\xabrn\xc3\xa2ti\xc3\xb4n\xc3\xa0liz\xc3\xa6ti\xc3\xb8n"
69
+ puts "#{i18n} => #{i18n.asciize}"
70
+ end
71
+
72
+ # http://www.jroller.com/obie/tags/unicode
73
+ #
74
+ # require 'iconv'
75
+ # require 'unicode'
76
+ #
77
+ # class String
78
+ #
79
+ # def to_ascii
80
+ # # split in muti-byte aware fashion and translate characters over 127
81
+ # # and dropping characters not in the translation hash
82
+ # self.chars.split('').collect { |c| (c[0] <= 127) ? c : translation_hash[c[0]] }.join
83
+ # end
84
+ #
85
+ # def to_url_format
86
+ # url_format = self.to_ascii
87
+ # url_format = url_format.gsub(/[^A-Za-z0-9]/, '') # all non-word
88
+ # url_format.downcase!
89
+ # url_format
90
+ # end
91
+ #
92
+ # protected
93
+ #
94
+ # def translation_hash
95
+ # @@translation_hash ||= setup_translation_hash
96
+ # end
97
+ #
98
+ # def setup_translation_hash
99
+ # accented_chars = "ÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöøùúûüý"
100
+ # unaccented_chars = "AAAAAACEEEEIIIIDNOOOOOxOUUUUYaaaaaaceeeeiiiinoooooouuuuy"
101
+ #
102
+ # translation_hash = Hash.zip(accented_chars.chars, unaccented_chars.chars)
103
+ # translation_hash["Æ".chars[0]] = 'AE'
104
+ # translation_hash["æ".chars[0]] = 'ae'
105
+ # translation_hash
106
+ # end
107
+ #
108
+ # end
@@ -1,23 +1,31 @@
1
1
  require 'time'
2
2
  require 'date'
3
- DateTime.class_eval do
3
+
4
+ class Time
5
+ # strftime() format to flatten a date
6
+ FLAT_FORMAT = "%Y%m%d%H%M%S"
7
+ # Flatten
8
+ def to_flat
9
+ utc.strftime(FLAT_FORMAT)
10
+ end
11
+
4
12
  #
5
13
  # Parses the time but never fails.
6
14
  # Return value is always in the UTC time zone.
7
15
  #
8
- # A flattened datetime -- a 12-digit YYYYmmddHHMMMSS -- is fixed to the UTC
16
+ # A flattened datetime -- a 14-digit YYYYmmddHHMMMSS -- is fixed to the UTC
9
17
  # time zone by parsing it as YYYYmmddHHMMMSSZ <- 'Z' at end
10
18
  #
11
19
  def self.parse_safely dt
12
20
  return nil if dt.blank?
13
21
  begin
14
- if dt.to_s =~ /\A\d{12}Z?\z/
15
- parse(dt+'Z', true)
16
- else
17
- parse(dt, true).utc
22
+ case
23
+ when dt.is_a?(Time) then dt.utc
24
+ when (dt.to_s =~ /\A\d{14}\z/) then parse(dt.to_s+'Z', true)
25
+ else parse(dt.to_s, true).utc
18
26
  end
19
27
  rescue StandardError => e
20
- Log.info e
28
+ Log.debug e
21
29
  end
22
30
  end
23
31
 
@@ -25,3 +33,21 @@ DateTime.class_eval do
25
33
  parse_safely(str).to_flat
26
34
  end
27
35
  end
36
+
37
+ class DateTime < Date
38
+ # strftime() format to flatten a date
39
+ FLAT_FORMAT = "%Y%m%d%H%M%S"
40
+ # Flatten
41
+ def to_flat
42
+ strftime(FLAT_FORMAT)
43
+ end
44
+ end
45
+
46
+ class Date
47
+ # strftime() format to flatten a date
48
+ FLAT_FORMAT = "%Y%m%d"
49
+ # Flatten
50
+ def to_flat
51
+ strftime(FLAT_FORMAT)
52
+ end
53
+ end