RubyGems - trace_visualization - Versions diffs - 0.0.1 → 0.0.2 - Mend

trace_visualization 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

checksums.yaml +4 -4
data/Gemfile +2 -1
data/Rakefile +11 -1
data/lib/trace_visualization/assert.rb +5 -0
data/lib/trace_visualization/data/lexeme.rb +59 -0
data/lib/trace_visualization/lexeme_overlap_filter.rb +64 -0
data/lib/trace_visualization/mapping.rb +219 -56
data/lib/trace_visualization/preprocessor/Makefile +43 -0
data/lib/trace_visualization/preprocessor/hashmap.c +93 -0
data/lib/trace_visualization/preprocessor/hashmap.h +27 -0
data/lib/trace_visualization/preprocessor/hashmap_test.cpp +90 -0
data/lib/trace_visualization/preprocessor/lexeme.h +11 -0
data/lib/trace_visualization/preprocessor/lexeme_table.c +50 -0
data/lib/trace_visualization/preprocessor/lexeme_table.h +13 -0
data/lib/trace_visualization/preprocessor/lexeme_table_cpp.h +61 -0
data/lib/trace_visualization/preprocessor/parser_functions.c +42 -0
data/lib/trace_visualization/preprocessor/parser_test.cpp +71 -0
data/lib/trace_visualization/preprocessor/preprocessor.l +18 -0
data/lib/trace_visualization/preprocessor/test_main.cpp +39 -0
data/lib/trace_visualization/profile.rb +38 -0
data/lib/trace_visualization/reorder.rb +22 -11
data/lib/trace_visualization/repetitions_psy.rb +1 -1
data/lib/trace_visualization/suffix_array.rb +4 -0
data/lib/trace_visualization/utils.rb +38 -0
data/lib/trace_visualization/version.rb +1 -1
data/lib/trace_visualization/visualization/console_color_print.rb +4 -2
data/lib/trace_visualization.rb +68 -4
data/spec/bwt_spec.rb +29 -7
data/spec/lexeme_overlap_filter_spec.rb +59 -0
data/spec/longest_common_prefix_spec.rb +3 -3
data/spec/mapping_spec.rb +80 -34
data/spec/reorder_spec.rb +25 -7
data/spec/repetitions_psy_spec.rb +5 -5
data/spec/suffix_array_spec.rb +30 -8
data/spec/utils_spec.rb +30 -0
metadata +22 -3
data/LICENSE.txt +0 -22

data/lib/trace_visualization/preprocessor/hashmap_test.cpp ADDED Viewed

@@ -0,0 +1,90 @@
+#include <cppunit/extensions/HelperMacros.h>
+#include <iostream>
+#include "hashmap.h"
+using namespace std;
+class hashmap_test : public CPPUNIT_NS::TestFixture {
+	CPPUNIT_TEST_SUITE(hashmap_test);
+	CPPUNIT_TEST(smoke_test);
+	CPPUNIT_TEST(complex_test);
+	CPPUNIT_TEST(hashmap_values_test);
+	CPPUNIT_TEST_SUITE_END();
+public:
+	void smoke_test(void);
+	void complex_test(void);
+	void hashmap_values_test(void);
+};
+CPPUNIT_TEST_SUITE_REGISTRATION(hashmap_test);
+void hashmap_test::smoke_test() {
+	hashmap_t* map = hashmap_new();
+	const char* value = "value";
+	hashmap_put(map, 12345, (char*)value);
+	void* ptr = hashmap_get(map, 1);
+	CPPUNIT_ASSERT(ptr == NULL);
+	ptr = hashmap_get(map, 12345);
+	CPPUNIT_ASSERT(ptr != NULL);
+	CPPUNIT_ASSERT(strcmp((char*)ptr, value) == 0);
+	hashmap_free(map);
+}
+void hashmap_test::complex_test() {
+	hashmap_t* map = hashmap_new();
+	CPPUNIT_ASSERT(map != NULL);
+	for (long i = 0; i < HASHMAP_SIZE << 3; i += HASHMAP_SIZE >> 2) {
+		hashmap_put(map, i, (void*)i);
+	}
+	for (int i = 0; i < HASHMAP_SIZE; i++) {
+		if (i % (HASHMAP_SIZE >> 2) == 0) {
+			CPPUNIT_ASSERT(map->table[i] != NULL);
+		} else {
+			CPPUNIT_ASSERT(map->table[i] == NULL);
+		}
+	}
+	for (long i = 0; i < HASHMAP_SIZE << 3; i += HASHMAP_SIZE >> 2) {
+		void* ptr = hashmap_get(map, i);
+		CPPUNIT_ASSERT(ptr == (void*)i);
+		CPPUNIT_ASSERT(hashmap_get(map, i + 1) == NULL);
+		CPPUNIT_ASSERT(hashmap_get(map, i - 1) == NULL);
+	}
+	hashmap_free(map);
+}
+void hashmap_test::hashmap_values_test() {
+	hashmap_t* map = hashmap_new();
+	CPPUNIT_ASSERT(map != NULL);
+	int k = 2;
+	for (long i = 0; i < HASHMAP_SIZE * k; i += 1) {
+		hashmap_put(map, i, (void*)i);
+	}
+	CPPUNIT_ASSERT(map->size == HASHMAP_SIZE * k);
+	void** values = hashmap_values(map);
+	CPPUNIT_ASSERT(values != NULL);
+	for (int i = 0; i < map->size; i++) {
+		void* value = values[i];
+		int int_value = (i % k) * HASHMAP_SIZE + i / k;
+		CPPUNIT_ASSERT(value == (void*)int_value);
+	}
+	hashmap_values_free(values);
+	hashmap_free(map);
+}

data/lib/trace_visualization/preprocessor/lexeme.h ADDED Viewed

@@ -0,0 +1,11 @@
+#ifndef __LEXEME_H__
+#define __LEXEME_H__
+struct lexeme_t {
+	const char* name;
+	const char* source;
+	long        numeric;
+	int         ord;
+};
+#endif

data/lib/trace_visualization/preprocessor/lexeme_table.c ADDED Viewed

@@ -0,0 +1,50 @@
+#include "lexeme_table.h"
+#include <stdlib.h>
+hashmap_t* lexeme_table_new() {
+	return hashmap_new();
+}
+lexeme_t* install_lexeme(hashmap_t* lexeme_table, const char* name, const char* source, long numeric) {
+	lexeme_t* lexeme = NULL;
+	void* ptr = hashmap_get(lexeme_table, numeric);
+	if (ptr == NULL) {
+		lexeme = (lexeme_t*)malloc(sizeof(lexeme_t));
+		lexeme->name = name;
+		lexeme->source = source;
+		lexeme->numeric = numeric;
+		hashmap_put(lexeme_table, numeric, lexeme);
+	} else {
+		lexeme = (lexeme_t*)ptr;
+	}
+	return lexeme;
+}
+int compare_lexeme(const void* a, const void* b) {
+	return ((lexeme_t*)*(void**)a)->numeric - ((lexeme_t*)*(void**)b)->numeric;
+}
+void reorder_lexemes(hashmap_t* lexeme_table) {
+	void** values = hashmap_values(lexeme_table);
+	qsort(values, lexeme_table->size, sizeof(void*), compare_lexeme);
+	int  ord  = 0;
+	long prev = -1;
+	for (int i = 0; i < lexeme_table->size; i++) {
+		int numeric = ((lexeme_t*)values[i])->numeric;
+		if (numeric != prev) {
+			ord += 1;
+			prev = numeric;
+		}
+		((lexeme_t*)values[i])->ord = ord;
+	}
+}

data/lib/trace_visualization/preprocessor/lexeme_table.h ADDED Viewed

@@ -0,0 +1,13 @@
+#ifndef __LEXEME_TABLE_H__
+#define __LEXEME_TABLE_H__
+#include "lexeme.h"
+#include "hashmap.h"
+extern hashmap_t* lexeme_table_new();
+extern lexeme_t* install_lexeme(hashmap_t* lexeme_table, const char* name, const char* source, long numeric);
+extern void reorder_lexemes(hashmap_t* lexeme_table);
+#endif

data/lib/trace_visualization/preprocessor/lexeme_table_cpp.h ADDED Viewed

@@ -0,0 +1,61 @@
+#ifndef __LEXEME_TABLE_H__
+#define __LEXEME_TABLE_H__
+#include <map>
+#include <vector>
+#include <algorithm>
+#include "lexeme.h"
+class lexeme_table {
+public:
+	~lexeme_table() {
+		for (std::map<long, lexeme_t*>::iterator iter = lexeme_table.begin(); iter != lexeme_table.end(); iter++) {
+			delete iter->second;
+		}
+	}
+	lexeme_t* install_lexeme(const char* name, const char* source, long numeric) {
+		std::map<long, lexeme_t*>::iterator iter = lexeme_table.find(numeric);
+		lexeme_t* lexeme = NULL;
+		if (iter == lexeme_table.end()) {
+			lexeme = new lexeme_t(name, source, numeric);
+			lexeme_table[numeric] = lexeme;
+		} else {
+			lexeme = iter->second;
+		}
+		return lexeme;
+	}
+	void reorder() {
+		std::vector<lexeme_t*> tmp;
+		for (std::map<long, lexeme_t*>::iterator iter = lexeme_table.begin(); iter != lexeme_table.end(); iter++) {
+			tmp.push_back(iter->second);
+		}
+		std::sort(tmp.begin(), tmp.end(), reorder_function);
+		int  ord  = 0;
+		long prev = -1;
+		for (std::vector<lexeme_t*>::iterator iter = tmp.begin(); iter != tmp.end(); iter++) {
+			if (prev != (*iter)->numeric) {
+				ord += 1;
+				prev = (*iter)->numeric;
+			}
+			(*iter)->ord = ord;
+		}
+	}
+private:
+	std::map<long, lexeme_t*> lexeme_table;
+	static bool reorder_function(lexeme_t* a, lexeme_t* b) {
+		return a->numeric < b->numeric;
+	}
+};
+#endif

data/lib/trace_visualization/preprocessor/parser_functions.c ADDED Viewed

@@ -0,0 +1,42 @@
+#include <stdio.h>
+#include <time.h>
+/**
+ * When flex finds a match, yytext points to the first character of the match
+ * in the input buffer. The string itself is part of the input buffer, and is
+ * NOT allocated separately
+ */
+extern const char* yytext;
+/**
+ * Implements in two ways: (f)lex and unit-tests
+ */
+extern void output_lexeme(const char* name, const char* source, int value);
+//-----------------------------------------------------------------------------
+void parse_identifier() {
+	int id;
+	sscanf(yytext, "[%d]", &id);
+	output_lexeme("ID", yytext, id);
+}
+//-----------------------------------------------------------------------------
+void parse_ip() {
+	unsigned ips[4];
+	sscanf(yytext, "%d.%d.%d.%d", ips, ips + 1, ips + 2, ips + 3);
+	unsigned result = (((ips[0] << 24) & 0xFF000000) | ((ips[1] << 16) & 0xFF0000) | ((ips[2] << 8) & 0xFF00) | (ips[3] & 0xFF));
+	output_lexeme("IP", yytext, result);
+}
+//-----------------------------------------------------------------------------
+void parse_date() {
+	struct tm tm;
+	time_t t;
+	if (strptime(yytext, "[%d %b %Y %H:%M:%S]", &tm) == NULL)
+		/* handle error */ ;
+	t = mktime(&tm);
+	output_lexeme("TIME", yytext, t);
+}

data/lib/trace_visualization/preprocessor/parser_test.cpp ADDED Viewed

@@ -0,0 +1,71 @@
+#include <cppunit/extensions/HelperMacros.h>
+#include <iostream>
+#include "lexeme.h"
+#include "lexeme_table.h"
+const char* yytext;
+void output_lexeme(const char* name, long value);
+extern void parse_identifier();
+extern void parse_ip();
+extern void parse_date();
+using namespace std;
+class parser_test : public CPPUNIT_NS::TestFixture {
+	CPPUNIT_TEST_SUITE(parser_test);
+	CPPUNIT_TEST(identifier_parser_test);
+	CPPUNIT_TEST(lexeme_table_test);
+	CPPUNIT_TEST_SUITE_END();
+public:
+	void identifier_parser_test(void);
+	void lexeme_table_test(void);
+};
+CPPUNIT_TEST_SUITE_REGISTRATION(parser_test);
+lexeme_t lexeme;
+// lexeme.name = "";
+// lexeme.source = "";
+// lexeme.numeric = -1;
+// lexeme.ord = -1;
+void output_lexeme(const char* name, long value) {
+	lexeme.name    = name;
+	lexeme.numeric = value;
+}
+//-----------------------------------------------------------------------------
+void parser_test::identifier_parser_test(void) {
+	yytext = "[123]";
+	parse_identifier();
+	CPPUNIT_ASSERT(strcmp("ID", lexeme.name) == 0);
+}
+//-----------------------------------------------------------------------------
+void parser_test::lexeme_table_test(void) {
+	hashmap_t* table = lexeme_table_new();
+	lexeme_t* lexeme         = install_lexeme(table, "id", "[123]", 123);
+	lexeme_t* another_lexeme = install_lexeme(table, "id", "[124]", 124);
+	lexeme_t* repeat_lexeme  = install_lexeme(table, "id", "[123]", 123);
+	CPPUNIT_ASSERT(lexeme         != NULL);
+	CPPUNIT_ASSERT(another_lexeme != NULL);
+	CPPUNIT_ASSERT(repeat_lexeme  != NULL);
+	CPPUNIT_ASSERT_EQUAL(lexeme, repeat_lexeme);
+	reorder_lexemes(table);
+	CPPUNIT_ASSERT_EQUAL(1, lexeme->ord);
+	CPPUNIT_ASSERT_EQUAL(2, another_lexeme->ord);
+}

data/lib/trace_visualization/preprocessor/preprocessor.l ADDED Viewed

@@ -0,0 +1,18 @@
+%{
+extern void parse_identifier();
+extern void parse_ip();
+extern void parse_date();
+void output_lexeme(const char* name, const char* source, int value) {
+	printf("{LEXEME;%s;%s;%d}", name, source, value);
+}
+%}
+%option nounput
+%%
+\[[0-9]{3,}\]	parse_identifier();
+(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?) parse_ip();
+\[[0-9]{2}\ [a-zA-Z]{3}\ [0-9]{4}\ [0-9]{2}\:[0-9]{2}\:[0-9]{2}\]	parse_date();
+%%

data/lib/trace_visualization/preprocessor/test_main.cpp ADDED Viewed

@@ -0,0 +1,39 @@
+/**
+ * @file test_main.cpp
+ * @brief the main module for test batch using CPPUNIT unit test framework.
+ */
+#include <cppunit/BriefTestProgressListener.h>
+#include <cppunit/CompilerOutputter.h>
+#include <cppunit/extensions/TestFactoryRegistry.h>
+#include <cppunit/TestResult.h>
+#include <cppunit/TestResultCollector.h>
+#include <cppunit/TestRunner.h>
+/**
+ * The main funnction of unit test batch runner.
+ * @param argc ignored.
+ * @param argv ignored.
+ */
+int main(int argc, char* argv[]) {
+  // Create the event manager and test controller
+  CPPUNIT_NS::TestResult controller;
+  // Add a listener that colllects test result
+  CPPUNIT_NS::TestResultCollector result;
+  controller.addListener( &result );
+  // Add a listener that print dots as test run.
+  CPPUNIT_NS::BriefTestProgressListener progress;
+  controller.addListener( &progress );
+  // Add the top suite to the test runner
+  CPPUNIT_NS::TestRunner runner;
+  runner.addTest( CPPUNIT_NS::TestFactoryRegistry::getRegistry().makeTest() );
+  runner.run( controller );
+  // Print test in a compiler compatible format.
+  CPPUNIT_NS::CompilerOutputter outputter( &result, std::cerr );
+  outputter.write();
+  return result.wasSuccessful() ? 0 : 1;
+}

data/lib/trace_visualization/profile.rb ADDED Viewed

@@ -0,0 +1,38 @@
+module TraceVisualization
+  module Profile
+    def self.time(name)
+      start = Time.now
+      puts "#{start} Start #{name}"
+      yield
+      finish = Time.now
+      puts "#{finish} Finish #{name}"
+      (finish.to_f - start.to_f).round(3)
+    end
+    def self.processing_time(message, logger = nil, object = nil, method = nil)
+      start = Time.now
+      yield
+      finish = Time.now
+      puts "#{message}#{object != nil ? (object.send(method)) : ''}, pt = #{(finish.to_f - start.to_f).round(4)} sec"
+    end
+    def self.pt
+      start = Time.now
+      yield
+      finish = Time.now
+      (finish.to_f - start.to_f).round(4)
+    end
+  end
+end

data/lib/trace_visualization/reorder.rb CHANGED Viewed

@@ -1,25 +1,36 @@
 module TraceVisualization
   module Reorder
-    # Assign new values (ord field) in order to reduce the distance between min
-    # and max values. It's necessary to reduce the size of the alphabet.
+    # Assign new int_values (ord field) in order to reduce the distance between
+    # min and max int_values. It's necessary to reduce the size of the alphabet.
+    # Return max int_value
     def self.process(data)
-      sorted = data.sort do |a, b|
-        c = a.value - b.value
-        c == 0 ? 0 : (c < 0 ? -1 : 1)
-      end
+      sorted = data.sort { |a, b| a.int_value <=> b.int_value }
-      idx = 0
-      prev = nil
+      termination_chars = []
+      idx, prev = 0, nil
       sorted.each do |item|
-        if prev != item.value
-          prev = item.value
+        if prev != item.int_value
+          prev = item.int_value
           idx += 1
         end
-        item.ord = idx
+        if item.int_value == TraceVisualization::TERMINATION_CHAR.ord
+          termination_chars << item
+          idx -= 1
+        else
+          item.ord = idx
+        end
       end
+      if termination_chars.size > 0
+        # Set maximal value for termination char
+        termination_chars.each { |x| x.ord = idx + 1 }
+        idx += 1
+      end
+      idx
     end
   end
 end

data/lib/trace_visualization/repetitions_psy.rb CHANGED Viewed

@@ -10,7 +10,7 @@ module TraceVisualization
       lcp = TraceVisualization::LongestCommonPrefix.effective(str, sa, str.size)
       bwt = TraceVisualization::BurrowsWheelerTransform.bwt(str, sa, str.length)
-      result = psy1_original(lcp, bwt, 3, str.length)
+      result = psy1_original(lcp, bwt, p_min, str.length)
       result = decode_psy1_result(result, sa) if decode_result
       result

data/lib/trace_visualization/suffix_array.rb CHANGED Viewed

@@ -27,6 +27,8 @@ module TraceVisualization
         str.each_char { |c| s << c.ord }
       elsif str.instance_of? Array
         str.each { |c| s << c.ord }
+      else
+        s = str
       end
       3.times { s << 0 }
@@ -35,6 +37,8 @@ module TraceVisualization
       effective_linear(s, suffix_array, n, s.max + 1)
+      3.times { s.pop }
       suffix_array[0 ... -3]
     end

data/lib/trace_visualization/utils.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+require 'trace_visualization/assert'
 module TraceVisualization
   module Utils
@@ -5,8 +7,11 @@ module TraceVisualization
       lp.hash + rp.hash
     end
     # Get the start position of lines
     def self.lines_pos(str)
+      TraceVisualization.assert_instance_of(str, String)
       lines_pos = [0]
       pos = -1
@@ -43,5 +48,38 @@ module TraceVisualization
       rs_by_line
     end
+    # Read data from file
+    # Allowed options
+    #   file_name file name
+    #   n_bytes the number of bytes
+    #   n_lines the number of lines
+    #
+    # If both options - n_bytes and n_lines - are set, it uses n_bytes
+    def self.read_file(options)
+      str = nil
+      if options[:file_name]
+        str = ""
+        if options[:n_bytes] != nil
+          str = IO.read(options[:file_name], options[:n_bytes])
+        else
+          fd = open(options[:file_name])
+          limit = options[:n_lines] || 2**32
+          begin
+            while (line = fd.readline)
+              str += line
+              limit -= 1
+              break if limit == 0
+            end
+          rescue EOFError => e
+          end
+        end
+      end
+      str
+    end
   end # module Utils
 end # module TraceVisualization

data/lib/trace_visualization/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module TraceVisualization
-  VERSION = "0.0.1"
+  VERSION = '0.0.2'
 end

data/lib/trace_visualization/visualization/console_color_print.rb CHANGED Viewed

@@ -1,6 +1,6 @@
-require 'travis/data/repetition'
+require 'trace_visualization/data/repetition'
-module Travis
+module TraceVisualization
   module Visualization
     module ConsoleColorPrint
@@ -9,6 +9,8 @@ module Travis
       FNSH = "\033[0m"
       def self.hl(str, repetition)
+        raise "repetition must be instance of TraceVisualization::Data::Repetition" if not repetition.instance_of? TraceVisualization::Data::Repetition
         result = ""
         prev_position = 0
         positions = repetition.build_positions

data/lib/trace_visualization.rb CHANGED Viewed

@@ -1,10 +1,74 @@
-require "trace_visualization/version"
+%w(
+  mapping
+  utils
+  version
+).each { |file| require File.join(File.dirname(__FILE__), 'trace_visualization', file) }
+require 'logger'
+require 'benchmark'
 module TraceVisualization
   # Should be 'greater' of all possible chars in the lexicographical order
-  TERMINATION_CHAR = '$'
+  TERMINATION_CHAR = 255.chr
   FORBIDDEN_CHARS = /\n/
+  #
+  # options[:str]
+  # options[:file_name]
+  #
+  def self.process(options = {})
+    options = set_default_options(options)
+    logger  = options[:logger]
+    # Preprocess
+    file_name = options[:file_name]
+    # Read & mapping file
+    mapping = TraceVisualization::Mapping.new
+    mapping.process do
+      from_preprocessed_file options[:file_name]
+    end
+=begin
+    logger.info 'start process'
+    str        = nil
+    str_mapped = nil
+    Benchmark.bm(14) do |x|
+      x.report('read file') { str = options[:str] || TraceVisualization::Utils.read_file(options) }
+      x.report('mapping') { str_mapped = TraceVisualization::Mapping.new(str) }
+    end
+    str_len = str.length
+    map_len = str_mapped.length
+    logger.info("str.length = #{str_len}, str_mapped.length = #{map_len}, compression = #{((str_len.to_f - map_len) / str_len.to_f).round(2)}%")
+    return []
+    rs = TraceVisualization::Repetitions.psy1(str_mapped, options[:p_min], true)
+    logger.info 'PSY1 finish. build context'
+    context = TraceVisualization::Repetitions::Context.new(str_mapped, rs)
+    logger.info 'first concat step'
+    TraceVisualization::RepetitionsConcatenation.process(rs, 1, context)
+    # Approximate
+    # Vissss
+=end
+    #rs
+  end
+  def self.set_default_options(options)
+    options = {
+      :p_min => 3,
+      :logger => Logger.new(STDOUT)
+    }.merge options
+  end
 end