cmess 0.1.0.281 → 0.1.1.283

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/cmess.rb CHANGED
@@ -36,6 +36,8 @@
36
36
  # convert them back -- this is where cinderella comes in,
37
37
  # sorting the good ones into the pot and the (potentially)
38
38
  # bad ones into the crop... (see Cinderella)
39
+ # bconv:: Convert between bibliographic (and other) encodings.
40
+ # (see BConv)
39
41
  # decode_entities:: Decode HTML entities in a string. (see DecodeEntities)
40
42
 
41
43
  module CMess
@@ -0,0 +1,98 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # A component of cmess, the encoding tool-box. #
5
+ # #
6
+ # Copyright (C) 2008 University of Cologne, #
7
+ # Albertus-Magnus-Platz, #
8
+ # 50932 Cologne, Germany #
9
+ # #
10
+ # Authors: #
11
+ # Jens Wille <jens.wille@uni-koeln.de> #
12
+ # #
13
+ # cmess is free software; you can redistribute it and/or modify it under the #
14
+ # terms of the GNU General Public License as published by the Free Software #
15
+ # Foundation; either version 3 of the License, or (at your option) any later #
16
+ # version. #
17
+ # #
18
+ # cmess is distributed in the hope that it will be useful, but WITHOUT ANY #
19
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
20
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
21
+ # details. #
22
+ # #
23
+ # You should have received a copy of the GNU General Public License along #
24
+ # with cmess. If not, see <http://www.gnu.org/licenses/>. #
25
+ # #
26
+ ###############################################################################
27
+ #++
28
+
29
+ require 'iconv'
30
+ require 'cmess'
31
+
32
+ # Convert between bibliographic (and other) encodings.
33
+
34
+ module CMess::BConv
35
+
36
+ extend self
37
+
38
+ # our version ;-)
39
+ VERSION = '0.0.1'
40
+
41
+ INTERMEDIATE_ENCODING = 'utf-8'
42
+
43
+ def encodings(chartab)
44
+ chartab[chartab.keys.first].keys.map { |encoding|
45
+ encoding.upcase unless encoding =~ /\A__/
46
+ }.compact.sort
47
+ end
48
+
49
+ def convert(input, output, source_encoding, target_encoding, chartab)
50
+ source_encoding.upcase!
51
+ target_encoding.upcase!
52
+
53
+ encodings = self.encodings(chartab)
54
+
55
+ if encodings.include?(source_encoding)
56
+ if encodings.include?(target_encoding)
57
+ charmap = chartab.inject({}) { |hash, (code, map)|
58
+ hash.update(map[source_encoding] => map[target_encoding].pack('U*'))
59
+ }
60
+
61
+ input.each_byte { |byte|
62
+ output.print charmap[[byte]] || charmap[[byte, input.getc]]
63
+ }
64
+ else
65
+ iconv = Iconv.new(target_encoding, INTERMEDIATE_ENCODING)
66
+
67
+ charmap = chartab.inject({}) { |hash, (code, map)|
68
+ hash.update(map[source_encoding] => [code.to_i(16)].pack('U*'))
69
+ }
70
+
71
+ input.each_byte { |byte|
72
+ output.print iconv.iconv(charmap[[byte]] || charmap[[byte, input.getc]])
73
+ }
74
+ end
75
+ else
76
+ if encodings.include?(target_encoding)
77
+ iconv = Iconv.new(INTERMEDIATE_ENCODING, source_encoding)
78
+
79
+ charmap = chartab.inject({}) { |hash, (code, map)|
80
+ hash.update(code.to_i(16) => map[target_encoding].pack('U*'))
81
+ }
82
+
83
+ input.each { |line|
84
+ iconv.iconv(line).unpack('U*').each { |byte|
85
+ output.print charmap[byte]
86
+ }
87
+ }
88
+ else
89
+ iconv = Iconv.new(target_encoding, source_encoding)
90
+
91
+ input.each { |line|
92
+ output.puts iconv.iconv(line)
93
+ }
94
+ end
95
+ end
96
+ end
97
+
98
+ end
data/lib/cmess/cli.rb CHANGED
@@ -28,52 +28,54 @@
28
28
 
29
29
  module CMess::CLI
30
30
 
31
- def ensure_readable(file)
32
- abort "Can't find input file: #{file}" unless File.readable?(file)
33
- end
31
+ DATA_DIR = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'data'))
34
32
 
35
- def ensure_directory(dir)
36
- abort "Directory not found: #{dir}" unless File.directory?(dir)
37
- end
33
+ def ensure_readable(file)
34
+ abort "Can't find input file: #{file}" unless File.readable?(file)
35
+ end
38
36
 
39
- def open_file_in_place(file)
40
- ensure_readable(file)
41
- [File.readlines(file), File.open(file, 'w')]
42
- end
37
+ def ensure_directory(dir)
38
+ abort "Directory not found: #{dir}" unless File.directory?(dir)
39
+ end
40
+
41
+ def open_file_in_place(file)
42
+ ensure_readable(file)
43
+ [File.readlines(file), File.open(file, 'w')]
44
+ end
43
45
 
44
- def open_file_or_std(file, mode = 'r')
45
- if file == '-'
46
- case mode
47
- when 'r': STDIN
48
- when 'w': STDOUT
49
- when 'a': STDERR
50
- else raise ArgumentError, "don't know how to handle mode '#{mode}'"
51
- end
52
- else
53
- ensure_readable(file) unless mode == 'w'
54
- File.open(file, mode)
46
+ def open_file_or_std(file, mode = 'r')
47
+ if file == '-'
48
+ case mode
49
+ when 'r': STDIN
50
+ when 'w': STDOUT
51
+ when 'a': STDERR
52
+ else raise ArgumentError, "don't know how to handle mode '#{mode}'"
55
53
  end
54
+ else
55
+ ensure_readable(file) unless mode == 'w'
56
+ File.open(file, mode)
56
57
  end
58
+ end
57
59
 
58
- def determine_system_encoding
59
- ENV['SYSTEM_ENCODING'] ||
60
- ENV['LANG'][/\.(.*)/, 1] ||
61
- system_encoding_not_found
62
- end
60
+ def determine_system_encoding
61
+ ENV['SYSTEM_ENCODING'] ||
62
+ ENV['LANG'][/\.(.*)/, 1] ||
63
+ system_encoding_not_found
64
+ end
63
65
 
64
- def system_encoding_not_found
65
- not_found = lambda {
66
- abort <<-EOT
66
+ def system_encoding_not_found
67
+ not_found = lambda {
68
+ abort <<-EOT
67
69
  Your system's encoding couldn't be determined automatically -- please specify it
68
70
  explicitly via the SYSTEM_ENCODING environment variable or via the '-t' option.
69
- EOT
70
- }
71
+ EOT
72
+ }
71
73
 
72
- def not_found.to_s
73
- 'NOT FOUND'
74
- end
75
-
76
- not_found
74
+ def not_found.to_s
75
+ 'NOT FOUND'
77
76
  end
78
77
 
78
+ not_found
79
79
  end
80
+
81
+ end
data/lib/cmess/version.rb CHANGED
@@ -30,7 +30,7 @@ module CMess::Version
30
30
 
31
31
  MAJOR = 0
32
32
  MINOR = 1
33
- TINY = 0
33
+ TINY = 1
34
34
 
35
35
  class << self
36
36
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cmess
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0.281
4
+ version: 0.1.1.283
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jens Wille
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-09-15 00:00:00 +02:00
12
+ date: 2008-09-16 00:00:00 +02:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -32,10 +32,11 @@ dependencies:
32
32
  - !ruby/object:Gem::Version
33
33
  version: "0"
34
34
  version:
35
- description: "Assist with handling messed up encodings (Currently includes the following tools: cinderella, decode_entities, guess_encoding)"
35
+ description: "Assist with handling messed up encodings (Currently includes the following tools: bconv, cinderella, decode_entities, guess_encoding)"
36
36
  email: jens.wille@uni-koeln.de
37
37
  executables:
38
38
  - cinderella
39
+ - bconv
39
40
  - decode_entities
40
41
  - guess_encoding
41
42
  extensions: []
@@ -46,6 +47,7 @@ extra_rdoc_files:
46
47
  - README
47
48
  files:
48
49
  - lib/cmess.rb
50
+ - lib/cmess/bconv.rb
49
51
  - lib/cmess/version.rb
50
52
  - lib/cmess/guess_encoding.rb
51
53
  - lib/cmess/cli.rb
@@ -55,6 +57,7 @@ files:
55
57
  - lib/cmess/guess_encoding/encoding.rb
56
58
  - lib/cmess/guess_encoding/automatic.rb
57
59
  - bin/cinderella
60
+ - bin/bconv
58
61
  - bin/decode_entities
59
62
  - bin/guess_encoding
60
63
  - COPYING
@@ -94,6 +97,7 @@ files:
94
97
  - data/csets/unicode/basic_latin.yaml
95
98
  - data/csets/unicode/cyrillic.yaml
96
99
  - data/test_chars.yaml
100
+ - data/chartab.yaml
97
101
  has_rdoc: true
98
102
  homepage: http://prometheus.rubyforge.org/cmess
99
103
  post_install_message:
@@ -127,6 +131,6 @@ rubyforge_project: prometheus
127
131
  rubygems_version: 1.2.0
128
132
  signing_key:
129
133
  specification_version: 2
130
- summary: "Assist with handling messed up encodings (Currently includes the following tools: cinderella, decode_entities, guess_encoding)"
134
+ summary: "Assist with handling messed up encodings (Currently includes the following tools: bconv, cinderella, decode_entities, guess_encoding)"
131
135
  test_files: []
132
136