huffman 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: b1ca0edfc95c127d985761e93aef3955db7b072c
4
+ data.tar.gz: 158a85243dd28bd5e091324ea3f67a5d9f38f729
5
+ SHA512:
6
+ metadata.gz: 470b5e0b2c8c63c5bc7f99c70c020e2716e5f1be2826d6d9af595d798d59d13e77b23945f7f9972dd01843a492b47a650c064419f5b899e1f1e93486204ee1cd
7
+ data.tar.gz: 7c57e085a01a7a5c138f291ab94602c2570a6ff3c8f4f46f496ea5d71ecb537d51b1fd9e484327117e59e855f6160fad08199ee64be41b07771ccc7f5c623fa9
Binary file
@@ -0,0 +1,22 @@
1
+ # Auto detect text files and perform LF normalization
2
+ * text=auto
3
+
4
+ # Custom for Visual Studio
5
+ *.cs diff=csharp
6
+ *.sln merge=union
7
+ *.csproj merge=union
8
+ *.vbproj merge=union
9
+ *.fsproj merge=union
10
+ *.dbproj merge=union
11
+
12
+ # Standard to msysgit
13
+ *.doc diff=astextplain
14
+ *.DOC diff=astextplain
15
+ *.docx diff=astextplain
16
+ *.DOCX diff=astextplain
17
+ *.dot diff=astextplain
18
+ *.DOT diff=astextplain
19
+ *.pdf diff=astextplain
20
+ *.PDF diff=astextplain
21
+ *.rtf diff=astextplain
22
+ *.RTF diff=astextplain
@@ -0,0 +1,19 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ huffman.log
19
+ data/.*
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format progress
@@ -0,0 +1 @@
1
+ 2.0.0-p247
@@ -0,0 +1,4 @@
1
+ rvm:
2
+ - 1.9.3
3
+ - 2.0.0
4
+ - 2.1.0
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in huffman.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Martin Lagrange
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,80 @@
1
+ ![Huffman](http://www.mathworks.com/matlabcentral/fx_files/33212/1/huffman.png)
2
+
3
+ [![Code Climate](https://codeclimate.com/repos/52c06ecae30ba036eb00347b/badges/adbbcd5f91fb73caf186/gpa.png)](https://codeclimate.com/repos/52c06ecae30ba036eb00347b/feed) [![Build Status](https://travis-ci.org/lagrangemartin/huffman.png?branch=master)](https://travis-ci.org/lagrangemartin/huffman)
4
+
5
+ # The Huffman gem
6
+
7
+ This gem allow you to encode and decode a text using the Huffman encoding compression algorithm.
8
+ It can also generate and visualize the huffman tree as a png file.
9
+
10
+ ## Setup
11
+
12
+ -Add this line to your application's Gemfile:
13
+ Ajoutez cette ligne à votre Gemfile :
14
+
15
+ ```ruby
16
+ gem 'huffman'
17
+ ```
18
+
19
+ -And then execute:
20
+
21
+ $ bundle
22
+
23
+ -Or install it yourself as:
24
+
25
+ $ gem install huffman
26
+
27
+
28
+ -Install graphviz if you want to be able to visualize trees.
29
+
30
+ ## Usage
31
+
32
+
33
+ Encode a text :
34
+
35
+ ```ruby
36
+ Huffman.encode_text(txt) # return text_encoded, dictionnary
37
+ Huffman.encode_text(txt) # return text_encoded, dictionnary
38
+ ```
39
+
40
+
41
+ Decode a text :
42
+ ```ruby
43
+ Huffman.decode_text(encoded_txt, dictionnary) # return decoded_text
44
+ ```
45
+
46
+ Encode a text file
47
+ ```ruby
48
+ Huffman.encode_file(file_name) # write a file_name.huffman-encoded file and a file_name.huffman-dictionnary
49
+ Huffman.encode_file(file_name) # write a file_name.huffman-encoded file and a file_name.huffman-dictionnary
50
+ ```
51
+
52
+ Decode a text file :
53
+ ```ruby
54
+ Huffman.decode_file(huffman_encoded_file_path,huffman-dictionnary_path) # write a huffman-encoded-back-to-original file
55
+ ```
56
+
57
+
58
+ ## Generate a Huffman tree
59
+
60
+ ![Huffman tree](http://img15.hostingpics.net/pics/575352tree.png)
61
+
62
+
63
+ This gem give you the ability to generate and visualize the huffman tree.
64
+ You have to install graphviz before.
65
+
66
+ All you need to do is to optionally add some options to the encode_file and decode_file method :
67
+
68
+
69
+ ```ruby
70
+ Huffman.encode_text(txt, tree_picture: true, tree_path: "my_trees/the_tree") # or
71
+ Huffman.encode_file(file_name, tree_picture: true, tree_path: "my_trees/the_tree")
72
+ ```
73
+
74
+ ## Contributing
75
+
76
+ 1. Fork it
77
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
78
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
79
+ 4. Push to the branch (`git push origin my-new-feature`)
80
+ 5. Create new Pull Request
@@ -0,0 +1,13 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new
5
+
6
+ task :default => :spec
7
+ task :test => :spec
8
+
9
+
10
+ desc "Open an irb session preloaded with this library"
11
+ task :console do
12
+ sh "irb -rubygems -I lib -r huffman.rb"
13
+ end
@@ -0,0 +1,29 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'huffman/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "huffman"
8
+ spec.version = Huffman::VERSION
9
+ spec.authors = ["Martin Lagrange", "Bastien Jorge"]
10
+ spec.email = ["lagrangemartin@gmail.com", "bastien.jorge@gmail.com"]
11
+ spec.summary = %q{Encode and decode a text using the Huffman encoding compression algorithm.}
12
+ spec.description = %q{This gem allow you to encode and decode a text using the Huffman encoding compression algorithm. It can also generate and visualize the huffman tree as a png file.}
13
+ spec.homepage = "http://github.com/lagrangemartin/huffman"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake"
23
+ spec.add_development_dependency "rspec"
24
+
25
+ spec.add_dependency "activesupport"
26
+ spec.add_dependency "ruby-graphviz"
27
+ spec.add_dependency "PriorityQueue"
28
+
29
+ end
Binary file
@@ -0,0 +1,79 @@
1
+ # encoding: utf-8
2
+ require 'active_support/all'
3
+ require "huffman/version"
4
+ require "huffman/letter_frequency"
5
+ require "huffman/binary_stream"
6
+ require "huffman/node"
7
+ require "huffman/tree"
8
+ require "huffman/log"
9
+
10
+
11
+ module Huffman
12
+ # Caractère fin de transmission
13
+ EOT = 3.chr
14
+
15
+ extend self
16
+
17
+ def encode_text(txt, options={})
18
+
19
+ options[:tree_picture] ||= false
20
+ options[:tree_path] ||= "tree"
21
+
22
+ frequencies = LetterFrequency.get_frequencies(txt)
23
+ tree = Tree.new(frequencies)
24
+ dictionnary = tree.dictionnary
25
+ tree.display_as_png(options[:tree_path]) if options[:tree_picture]
26
+ encoded_text = BinaryStream.get_bits_from_text(txt,dictionnary)
27
+ return encoded_text, dictionnary
28
+
29
+ end
30
+
31
+ def decode_text(encoded_text,dictionnary)
32
+ BinaryStream.get_text_from_bits(encoded_text,dictionnary)
33
+ end
34
+
35
+ def encode_file(file_path, options = {})
36
+
37
+ options[:tree_path] ||= file_path
38
+ txt = File.read(file_path).encode('UTF-8', :invalid => :replace)
39
+ txt = txt + EOT # On ajoute le marqueur EOT (enf of transmission 003)
40
+ encoded_text, dictionnary = encode_text(txt,options)
41
+ encoded_file_name =file_path+".huffman-encoded"
42
+
43
+
44
+ File.open(encoded_file_name, 'wb' ){|f| f.write [encoded_text].pack("B*") }
45
+
46
+ dictionnary_stream = dictionnary.collect { |bin, char| bin+"\t"+char }.join('')
47
+
48
+ dictionnary_file_name = file_path+".huffman-dictionnary"
49
+ File.open(dictionnary_file_name, 'wb' ){|f| f.write dictionnary_stream }
50
+
51
+ nil
52
+ end
53
+
54
+ def decode_file(file_path, dictionnary_file_path)
55
+ dictionnary, bits_buffer, next_char_is_the_symbol = {}, '',false
56
+
57
+ File.read(dictionnary_file_path).each_char do |c|
58
+ if c == "\t"
59
+ next_char_is_the_symbol = true
60
+ elsif next_char_is_the_symbol
61
+ dictionnary[bits_buffer] = c
62
+ bits_buffer.clear
63
+ next_char_is_the_symbol = false
64
+ else
65
+ bits_buffer += c
66
+ end
67
+ end
68
+
69
+ encoded_text = File.read(file_path).unpack("B*").join
70
+ original_text = decode_text(encoded_text,dictionnary)
71
+ File.open(file_path+"-back-to-original", 'wb' ){|f| f.write original_text }
72
+ nil
73
+ end
74
+
75
+
76
+
77
+
78
+
79
+ end
@@ -0,0 +1,26 @@
1
+ module Huffman
2
+ module BinaryStream
3
+ extend self
4
+
5
+ def get_bits_from_text(txt,dictionnary)
6
+ dictionnary = dictionnary.invert
7
+ txt.each_char.map{|char| ; dictionnary[char]}.join
8
+ end
9
+
10
+ def get_text_from_bits(bits,dictionnary)
11
+ original_text = '' ; buffer = ''
12
+ bits.each_char do |bit|
13
+ buffer += bit
14
+ # Si il y'a une correspondance
15
+ if dictionnary[buffer]
16
+ # Si c'est le marqueur de fin EOF
17
+ return original_text if dictionnary[buffer] == EOT
18
+
19
+ original_text += dictionnary[buffer]
20
+ buffer.clear
21
+ end
22
+ end
23
+ original_text
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,11 @@
1
+ module Huffman
2
+ module LetterFrequency
3
+ extend self
4
+
5
+ def get_frequencies(txt)
6
+ frequencies = Hash.new(0)
7
+ txt.each_char{|char| frequencies[char] += 1 }
8
+ frequencies
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,21 @@
1
+ require "logger"
2
+ module Huffman
3
+ extend self
4
+ class Log < Logger
5
+ private_class_method :new
6
+
7
+ @@logger = nil
8
+
9
+ def self.instance()
10
+ @@logger = new 'huffman.log' unless @@logger
11
+ @@logger.datetime_format = "%d/%m %H:%M:%S:%6N"
12
+ @@logger.formatter = proc do |severity, datetime, progname, msg|
13
+ "#{datetime.strftime(@@logger.datetime_format)}: #{msg}\n"
14
+ end
15
+ @@logger
16
+ end
17
+ end
18
+ def log
19
+ Log.instance
20
+ end
21
+ end
@@ -0,0 +1,63 @@
1
+ module Huffman
2
+ class Node
3
+ attr_accessor :value, :binary_value, :symbol, :left, :right
4
+ # Set the getters and setters
5
+
6
+ def initialize(value=nil, symbol=nil, left=nil, right=nil)
7
+ # The value of the node can't be nil
8
+ raise StandardError.new "The value of the node cannot be nil" if not value
9
+ @value, @symbol, @left, @right, @binary_value = value, symbol, left, right, ''
10
+ end
11
+
12
+
13
+ # Le parametre &block signifie qu'on peut passer en paramètre de la fonction un bloc de codes
14
+ # Soit en créeant un objet Proc.new avec du code, ou soit un lambda et en le passant en parametre => visit(:order)
15
+ # Ou encore directement un bloc comme : visit{|node| puts node.value}
16
+ # Ce bloc vas servir de visiteur et va permettre d'effectuer une action sur le bloc visité avec le mot clé "yield"
17
+
18
+ def visit(order=:preorder, &block)
19
+
20
+ raise StandardError.new "Wrong order" if not [:preorder,:inorder,:postorder].include?(order)
21
+ case order
22
+ when :preorder
23
+ yield self
24
+ @left.visit(order, &block) if left
25
+ @right.visit(order, &block) if right
26
+ when :inorder
27
+ @left.visit(order, &block) if left
28
+ yield self
29
+ @right.visit(order, &block) if right
30
+ when :postorder
31
+ @left.visit(order, &block) if left
32
+ @right.visit(order, &block) if right
33
+ yield self
34
+ end
35
+ end
36
+
37
+ # Invokes the given block once for each node visited
38
+ # Creates a new array containing the values returned by the block.
39
+
40
+ def visit_and_map(order=:preorder, &block)
41
+ array = []
42
+ visit(order){|node| array << yield(node)}
43
+ array
44
+ end
45
+
46
+ # Parcours les noeuds pour leur donner leur valeur binaire de Huffman
47
+ # Peut invoquer une action sur chaque noeud visité
48
+
49
+ def set_binary_values(&block)
50
+ [@left,@right].each_with_index do |node,bit_value|
51
+ if node
52
+ node.binary_value = @binary_value + bit_value.to_s
53
+ yield node if block_given?
54
+ node.set_binary_values(&block)
55
+ end
56
+ end
57
+ end
58
+
59
+ def leaf?
60
+ (not @left and not @right)
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,83 @@
1
+ #coding: utf-8
2
+ require 'priority_queue'
3
+ require 'graphviz'
4
+
5
+
6
+ module Huffman
7
+ class Tree
8
+
9
+ # Methodes délégués au noeud racine
10
+
11
+ delegate :visit,
12
+ :visit_and_map,
13
+ :set_binary_values,
14
+ :to => :@root
15
+
16
+ def initialize(frequencies)
17
+
18
+ # Liste de noeuds feuilles toujours triés par ordre croissant qui vont nous permettre de créer l'arbre de Huffman
19
+ nodes = PriorityQueue.new
20
+
21
+ frequencies.map{ |freq| nodes.push(Node.new(freq[1], freq[0]), freq[1]) }
22
+
23
+ # Tant qu'il y'a pas plus qu'un seul noeud dans la liste
24
+ until nodes.length == 1
25
+ # 1) On créer un noeud dont ses fils sont les deux premiers noeuds du tableau triés de noeud et la valeur leur somme
26
+ # On enlève les deux premiers noeuds
27
+ node1, node2 = nodes.delete_min.first, nodes.delete_min.first
28
+ # On créer un noeud parent
29
+ parent = Node.new(node1.value+node2.value,nil,node1,node2)
30
+ # 2) On ajoute le noeud à la liste
31
+ nodes.push(parent, parent.value)
32
+ end
33
+ @root = nodes.delete_min.first
34
+ set_binary_values
35
+ end
36
+
37
+ # On récupére la table de correspondance
38
+ def dictionnary
39
+ h = {}
40
+ #set_binary_values{|node| h[node.binary_value] = node.symbol if node.symbol}
41
+ # Equivalent mais plus rapide que :
42
+ # set_binary_values
43
+ visit(:postorder){|node| h[node.binary_value] = node.symbol if node.symbol }
44
+ h
45
+ end
46
+
47
+
48
+ def display_as_png(path="tree")
49
+ # Create a new graph
50
+ g = GraphViz.new(:G)
51
+ visit(:postorder) do |node|
52
+ # C'est un noeud parent inventé
53
+ color = node.symbol ? "yellow" : "red"
54
+ symbol = case node.symbol
55
+ when EOT
56
+ "EOT"
57
+ when "\t"
58
+ "TAB"
59
+ when "\b"
60
+ "BACKSPACE"
61
+ when " "
62
+ "WHITESPACE"
63
+ when "\n"
64
+ "LINE RETURN"
65
+ else
66
+ "#{node.symbol ? node.symbol : ""}"
67
+ end
68
+
69
+ label = "#{node.value}"
70
+ label += "➠#{node.binary_value}" if not node.binary_value == ''
71
+ label += "➡︎#{symbol}" if node.symbol
72
+
73
+ graphviz_node = g.add_nodes(node.__id__.to_s, label: label, "style" => "filled", "color" => color )
74
+ # On créer les arretes de ses enfants
75
+ g.add_edges(graphviz_node, g.get_node(node.left.__id__.to_s)) if node.left
76
+ g.add_edges(graphviz_node, g.get_node(node.right.__id__.to_s)) if node.right
77
+
78
+ end
79
+ g.output( :png => "#{path}.png" )
80
+
81
+ end
82
+ end
83
+ end