trace_visualization 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +2 -1
- data/Rakefile +11 -1
- data/lib/trace_visualization/assert.rb +5 -0
- data/lib/trace_visualization/data/lexeme.rb +59 -0
- data/lib/trace_visualization/lexeme_overlap_filter.rb +64 -0
- data/lib/trace_visualization/mapping.rb +219 -56
- data/lib/trace_visualization/preprocessor/Makefile +43 -0
- data/lib/trace_visualization/preprocessor/hashmap.c +93 -0
- data/lib/trace_visualization/preprocessor/hashmap.h +27 -0
- data/lib/trace_visualization/preprocessor/hashmap_test.cpp +90 -0
- data/lib/trace_visualization/preprocessor/lexeme.h +11 -0
- data/lib/trace_visualization/preprocessor/lexeme_table.c +50 -0
- data/lib/trace_visualization/preprocessor/lexeme_table.h +13 -0
- data/lib/trace_visualization/preprocessor/lexeme_table_cpp.h +61 -0
- data/lib/trace_visualization/preprocessor/parser_functions.c +42 -0
- data/lib/trace_visualization/preprocessor/parser_test.cpp +71 -0
- data/lib/trace_visualization/preprocessor/preprocessor.l +18 -0
- data/lib/trace_visualization/preprocessor/test_main.cpp +39 -0
- data/lib/trace_visualization/profile.rb +38 -0
- data/lib/trace_visualization/reorder.rb +22 -11
- data/lib/trace_visualization/repetitions_psy.rb +1 -1
- data/lib/trace_visualization/suffix_array.rb +4 -0
- data/lib/trace_visualization/utils.rb +38 -0
- data/lib/trace_visualization/version.rb +1 -1
- data/lib/trace_visualization/visualization/console_color_print.rb +4 -2
- data/lib/trace_visualization.rb +68 -4
- data/spec/bwt_spec.rb +29 -7
- data/spec/lexeme_overlap_filter_spec.rb +59 -0
- data/spec/longest_common_prefix_spec.rb +3 -3
- data/spec/mapping_spec.rb +80 -34
- data/spec/reorder_spec.rb +25 -7
- data/spec/repetitions_psy_spec.rb +5 -5
- data/spec/suffix_array_spec.rb +30 -8
- data/spec/utils_spec.rb +30 -0
- metadata +22 -3
- data/LICENSE.txt +0 -22
@@ -0,0 +1,90 @@
|
|
1
|
+
#include <cppunit/extensions/HelperMacros.h>
|
2
|
+
#include <iostream>
|
3
|
+
#include "hashmap.h"
|
4
|
+
|
5
|
+
using namespace std;
|
6
|
+
|
7
|
+
class hashmap_test : public CPPUNIT_NS::TestFixture {
|
8
|
+
CPPUNIT_TEST_SUITE(hashmap_test);
|
9
|
+
CPPUNIT_TEST(smoke_test);
|
10
|
+
CPPUNIT_TEST(complex_test);
|
11
|
+
CPPUNIT_TEST(hashmap_values_test);
|
12
|
+
CPPUNIT_TEST_SUITE_END();
|
13
|
+
public:
|
14
|
+
void smoke_test(void);
|
15
|
+
void complex_test(void);
|
16
|
+
void hashmap_values_test(void);
|
17
|
+
};
|
18
|
+
|
19
|
+
CPPUNIT_TEST_SUITE_REGISTRATION(hashmap_test);
|
20
|
+
|
21
|
+
void hashmap_test::smoke_test() {
|
22
|
+
hashmap_t* map = hashmap_new();
|
23
|
+
|
24
|
+
const char* value = "value";
|
25
|
+
hashmap_put(map, 12345, (char*)value);
|
26
|
+
|
27
|
+
void* ptr = hashmap_get(map, 1);
|
28
|
+
CPPUNIT_ASSERT(ptr == NULL);
|
29
|
+
|
30
|
+
ptr = hashmap_get(map, 12345);
|
31
|
+
CPPUNIT_ASSERT(ptr != NULL);
|
32
|
+
CPPUNIT_ASSERT(strcmp((char*)ptr, value) == 0);
|
33
|
+
|
34
|
+
hashmap_free(map);
|
35
|
+
}
|
36
|
+
|
37
|
+
void hashmap_test::complex_test() {
|
38
|
+
hashmap_t* map = hashmap_new();
|
39
|
+
|
40
|
+
CPPUNIT_ASSERT(map != NULL);
|
41
|
+
|
42
|
+
for (long i = 0; i < HASHMAP_SIZE << 3; i += HASHMAP_SIZE >> 2) {
|
43
|
+
hashmap_put(map, i, (void*)i);
|
44
|
+
}
|
45
|
+
|
46
|
+
for (int i = 0; i < HASHMAP_SIZE; i++) {
|
47
|
+
if (i % (HASHMAP_SIZE >> 2) == 0) {
|
48
|
+
CPPUNIT_ASSERT(map->table[i] != NULL);
|
49
|
+
} else {
|
50
|
+
CPPUNIT_ASSERT(map->table[i] == NULL);
|
51
|
+
}
|
52
|
+
}
|
53
|
+
|
54
|
+
for (long i = 0; i < HASHMAP_SIZE << 3; i += HASHMAP_SIZE >> 2) {
|
55
|
+
void* ptr = hashmap_get(map, i);
|
56
|
+
CPPUNIT_ASSERT(ptr == (void*)i);
|
57
|
+
|
58
|
+
CPPUNIT_ASSERT(hashmap_get(map, i + 1) == NULL);
|
59
|
+
CPPUNIT_ASSERT(hashmap_get(map, i - 1) == NULL);
|
60
|
+
}
|
61
|
+
|
62
|
+
hashmap_free(map);
|
63
|
+
}
|
64
|
+
|
65
|
+
void hashmap_test::hashmap_values_test() {
|
66
|
+
hashmap_t* map = hashmap_new();
|
67
|
+
|
68
|
+
CPPUNIT_ASSERT(map != NULL);
|
69
|
+
|
70
|
+
int k = 2;
|
71
|
+
|
72
|
+
for (long i = 0; i < HASHMAP_SIZE * k; i += 1) {
|
73
|
+
hashmap_put(map, i, (void*)i);
|
74
|
+
}
|
75
|
+
|
76
|
+
CPPUNIT_ASSERT(map->size == HASHMAP_SIZE * k);
|
77
|
+
|
78
|
+
void** values = hashmap_values(map);
|
79
|
+
CPPUNIT_ASSERT(values != NULL);
|
80
|
+
|
81
|
+
for (int i = 0; i < map->size; i++) {
|
82
|
+
void* value = values[i];
|
83
|
+
|
84
|
+
int int_value = (i % k) * HASHMAP_SIZE + i / k;
|
85
|
+
CPPUNIT_ASSERT(value == (void*)int_value);
|
86
|
+
}
|
87
|
+
|
88
|
+
hashmap_values_free(values);
|
89
|
+
hashmap_free(map);
|
90
|
+
}
|
@@ -0,0 +1,50 @@
|
|
1
|
+
#include "lexeme_table.h"
|
2
|
+
#include <stdlib.h>
|
3
|
+
|
4
|
+
hashmap_t* lexeme_table_new() {
|
5
|
+
return hashmap_new();
|
6
|
+
}
|
7
|
+
|
8
|
+
lexeme_t* install_lexeme(hashmap_t* lexeme_table, const char* name, const char* source, long numeric) {
|
9
|
+
lexeme_t* lexeme = NULL;
|
10
|
+
|
11
|
+
void* ptr = hashmap_get(lexeme_table, numeric);
|
12
|
+
|
13
|
+
if (ptr == NULL) {
|
14
|
+
lexeme = (lexeme_t*)malloc(sizeof(lexeme_t));
|
15
|
+
|
16
|
+
lexeme->name = name;
|
17
|
+
lexeme->source = source;
|
18
|
+
lexeme->numeric = numeric;
|
19
|
+
|
20
|
+
hashmap_put(lexeme_table, numeric, lexeme);
|
21
|
+
} else {
|
22
|
+
lexeme = (lexeme_t*)ptr;
|
23
|
+
}
|
24
|
+
|
25
|
+
return lexeme;
|
26
|
+
}
|
27
|
+
|
28
|
+
int compare_lexeme(const void* a, const void* b) {
|
29
|
+
return ((lexeme_t*)*(void**)a)->numeric - ((lexeme_t*)*(void**)b)->numeric;
|
30
|
+
}
|
31
|
+
|
32
|
+
void reorder_lexemes(hashmap_t* lexeme_table) {
|
33
|
+
void** values = hashmap_values(lexeme_table);
|
34
|
+
|
35
|
+
qsort(values, lexeme_table->size, sizeof(void*), compare_lexeme);
|
36
|
+
|
37
|
+
int ord = 0;
|
38
|
+
long prev = -1;
|
39
|
+
|
40
|
+
for (int i = 0; i < lexeme_table->size; i++) {
|
41
|
+
int numeric = ((lexeme_t*)values[i])->numeric;
|
42
|
+
|
43
|
+
if (numeric != prev) {
|
44
|
+
ord += 1;
|
45
|
+
prev = numeric;
|
46
|
+
}
|
47
|
+
|
48
|
+
((lexeme_t*)values[i])->ord = ord;
|
49
|
+
}
|
50
|
+
}
|
@@ -0,0 +1,13 @@
|
|
1
|
+
#ifndef __LEXEME_TABLE_H__
|
2
|
+
#define __LEXEME_TABLE_H__
|
3
|
+
|
4
|
+
#include "lexeme.h"
|
5
|
+
#include "hashmap.h"
|
6
|
+
|
7
|
+
extern hashmap_t* lexeme_table_new();
|
8
|
+
|
9
|
+
extern lexeme_t* install_lexeme(hashmap_t* lexeme_table, const char* name, const char* source, long numeric);
|
10
|
+
|
11
|
+
extern void reorder_lexemes(hashmap_t* lexeme_table);
|
12
|
+
|
13
|
+
#endif
|
@@ -0,0 +1,61 @@
|
|
1
|
+
#ifndef __LEXEME_TABLE_H__
|
2
|
+
#define __LEXEME_TABLE_H__
|
3
|
+
|
4
|
+
#include <map>
|
5
|
+
#include <vector>
|
6
|
+
#include <algorithm>
|
7
|
+
#include "lexeme.h"
|
8
|
+
|
9
|
+
class lexeme_table {
|
10
|
+
public:
|
11
|
+
|
12
|
+
~lexeme_table() {
|
13
|
+
for (std::map<long, lexeme_t*>::iterator iter = lexeme_table.begin(); iter != lexeme_table.end(); iter++) {
|
14
|
+
delete iter->second;
|
15
|
+
}
|
16
|
+
}
|
17
|
+
|
18
|
+
lexeme_t* install_lexeme(const char* name, const char* source, long numeric) {
|
19
|
+
std::map<long, lexeme_t*>::iterator iter = lexeme_table.find(numeric);
|
20
|
+
|
21
|
+
lexeme_t* lexeme = NULL;
|
22
|
+
|
23
|
+
if (iter == lexeme_table.end()) {
|
24
|
+
lexeme = new lexeme_t(name, source, numeric);
|
25
|
+
lexeme_table[numeric] = lexeme;
|
26
|
+
} else {
|
27
|
+
lexeme = iter->second;
|
28
|
+
}
|
29
|
+
|
30
|
+
return lexeme;
|
31
|
+
}
|
32
|
+
|
33
|
+
void reorder() {
|
34
|
+
std::vector<lexeme_t*> tmp;
|
35
|
+
for (std::map<long, lexeme_t*>::iterator iter = lexeme_table.begin(); iter != lexeme_table.end(); iter++) {
|
36
|
+
tmp.push_back(iter->second);
|
37
|
+
}
|
38
|
+
|
39
|
+
std::sort(tmp.begin(), tmp.end(), reorder_function);
|
40
|
+
|
41
|
+
int ord = 0;
|
42
|
+
long prev = -1;
|
43
|
+
for (std::vector<lexeme_t*>::iterator iter = tmp.begin(); iter != tmp.end(); iter++) {
|
44
|
+
if (prev != (*iter)->numeric) {
|
45
|
+
ord += 1;
|
46
|
+
prev = (*iter)->numeric;
|
47
|
+
}
|
48
|
+
|
49
|
+
(*iter)->ord = ord;
|
50
|
+
}
|
51
|
+
}
|
52
|
+
|
53
|
+
private:
|
54
|
+
std::map<long, lexeme_t*> lexeme_table;
|
55
|
+
|
56
|
+
static bool reorder_function(lexeme_t* a, lexeme_t* b) {
|
57
|
+
return a->numeric < b->numeric;
|
58
|
+
}
|
59
|
+
};
|
60
|
+
|
61
|
+
#endif
|
@@ -0,0 +1,42 @@
|
|
1
|
+
#include <stdio.h>
|
2
|
+
#include <time.h>
|
3
|
+
|
4
|
+
/**
|
5
|
+
* When flex finds a match, yytext points to the first character of the match
|
6
|
+
* in the input buffer. The string itself is part of the input buffer, and is
|
7
|
+
* NOT allocated separately
|
8
|
+
*/
|
9
|
+
extern const char* yytext;
|
10
|
+
|
11
|
+
/**
|
12
|
+
* Implements in two ways: (f)lex and unit-tests
|
13
|
+
*/
|
14
|
+
extern void output_lexeme(const char* name, const char* source, int value);
|
15
|
+
|
16
|
+
//-----------------------------------------------------------------------------
|
17
|
+
void parse_identifier() {
|
18
|
+
int id;
|
19
|
+
sscanf(yytext, "[%d]", &id);
|
20
|
+
output_lexeme("ID", yytext, id);
|
21
|
+
}
|
22
|
+
|
23
|
+
//-----------------------------------------------------------------------------
|
24
|
+
void parse_ip() {
|
25
|
+
unsigned ips[4];
|
26
|
+
sscanf(yytext, "%d.%d.%d.%d", ips, ips + 1, ips + 2, ips + 3);
|
27
|
+
unsigned result = (((ips[0] << 24) & 0xFF000000) | ((ips[1] << 16) & 0xFF0000) | ((ips[2] << 8) & 0xFF00) | (ips[3] & 0xFF));
|
28
|
+
output_lexeme("IP", yytext, result);
|
29
|
+
}
|
30
|
+
|
31
|
+
//-----------------------------------------------------------------------------
|
32
|
+
void parse_date() {
|
33
|
+
struct tm tm;
|
34
|
+
time_t t;
|
35
|
+
|
36
|
+
if (strptime(yytext, "[%d %b %Y %H:%M:%S]", &tm) == NULL)
|
37
|
+
/* handle error */ ;
|
38
|
+
|
39
|
+
t = mktime(&tm);
|
40
|
+
|
41
|
+
output_lexeme("TIME", yytext, t);
|
42
|
+
}
|
@@ -0,0 +1,71 @@
|
|
1
|
+
#include <cppunit/extensions/HelperMacros.h>
|
2
|
+
#include <iostream>
|
3
|
+
#include "lexeme.h"
|
4
|
+
#include "lexeme_table.h"
|
5
|
+
|
6
|
+
const char* yytext;
|
7
|
+
|
8
|
+
void output_lexeme(const char* name, long value);
|
9
|
+
|
10
|
+
extern void parse_identifier();
|
11
|
+
extern void parse_ip();
|
12
|
+
extern void parse_date();
|
13
|
+
|
14
|
+
using namespace std;
|
15
|
+
|
16
|
+
class parser_test : public CPPUNIT_NS::TestFixture {
|
17
|
+
CPPUNIT_TEST_SUITE(parser_test);
|
18
|
+
CPPUNIT_TEST(identifier_parser_test);
|
19
|
+
CPPUNIT_TEST(lexeme_table_test);
|
20
|
+
CPPUNIT_TEST_SUITE_END();
|
21
|
+
public:
|
22
|
+
void identifier_parser_test(void);
|
23
|
+
void lexeme_table_test(void);
|
24
|
+
};
|
25
|
+
|
26
|
+
CPPUNIT_TEST_SUITE_REGISTRATION(parser_test);
|
27
|
+
|
28
|
+
lexeme_t lexeme;
|
29
|
+
// lexeme.name = "";
|
30
|
+
// lexeme.source = "";
|
31
|
+
// lexeme.numeric = -1;
|
32
|
+
// lexeme.ord = -1;
|
33
|
+
|
34
|
+
void output_lexeme(const char* name, long value) {
|
35
|
+
lexeme.name = name;
|
36
|
+
lexeme.numeric = value;
|
37
|
+
}
|
38
|
+
|
39
|
+
//-----------------------------------------------------------------------------
|
40
|
+
void parser_test::identifier_parser_test(void) {
|
41
|
+
yytext = "[123]";
|
42
|
+
|
43
|
+
parse_identifier();
|
44
|
+
|
45
|
+
CPPUNIT_ASSERT(strcmp("ID", lexeme.name) == 0);
|
46
|
+
}
|
47
|
+
|
48
|
+
|
49
|
+
//-----------------------------------------------------------------------------
|
50
|
+
void parser_test::lexeme_table_test(void) {
|
51
|
+
hashmap_t* table = lexeme_table_new();
|
52
|
+
|
53
|
+
lexeme_t* lexeme = install_lexeme(table, "id", "[123]", 123);
|
54
|
+
lexeme_t* another_lexeme = install_lexeme(table, "id", "[124]", 124);
|
55
|
+
lexeme_t* repeat_lexeme = install_lexeme(table, "id", "[123]", 123);
|
56
|
+
|
57
|
+
CPPUNIT_ASSERT(lexeme != NULL);
|
58
|
+
CPPUNIT_ASSERT(another_lexeme != NULL);
|
59
|
+
CPPUNIT_ASSERT(repeat_lexeme != NULL);
|
60
|
+
|
61
|
+
CPPUNIT_ASSERT_EQUAL(lexeme, repeat_lexeme);
|
62
|
+
|
63
|
+
reorder_lexemes(table);
|
64
|
+
|
65
|
+
CPPUNIT_ASSERT_EQUAL(1, lexeme->ord);
|
66
|
+
CPPUNIT_ASSERT_EQUAL(2, another_lexeme->ord);
|
67
|
+
}
|
68
|
+
|
69
|
+
|
70
|
+
|
71
|
+
|
@@ -0,0 +1,18 @@
|
|
1
|
+
%{
|
2
|
+
extern void parse_identifier();
|
3
|
+
extern void parse_ip();
|
4
|
+
extern void parse_date();
|
5
|
+
|
6
|
+
void output_lexeme(const char* name, const char* source, int value) {
|
7
|
+
printf("{LEXEME;%s;%s;%d}", name, source, value);
|
8
|
+
}
|
9
|
+
|
10
|
+
%}
|
11
|
+
|
12
|
+
%option nounput
|
13
|
+
|
14
|
+
%%
|
15
|
+
\[[0-9]{3,}\] parse_identifier();
|
16
|
+
(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?) parse_ip();
|
17
|
+
\[[0-9]{2}\ [a-zA-Z]{3}\ [0-9]{4}\ [0-9]{2}\:[0-9]{2}\:[0-9]{2}\] parse_date();
|
18
|
+
%%
|
@@ -0,0 +1,39 @@
|
|
1
|
+
/**
|
2
|
+
* @file test_main.cpp
|
3
|
+
* @brief the main module for test batch using CPPUNIT unit test framework.
|
4
|
+
*/
|
5
|
+
#include <cppunit/BriefTestProgressListener.h>
|
6
|
+
#include <cppunit/CompilerOutputter.h>
|
7
|
+
#include <cppunit/extensions/TestFactoryRegistry.h>
|
8
|
+
#include <cppunit/TestResult.h>
|
9
|
+
#include <cppunit/TestResultCollector.h>
|
10
|
+
#include <cppunit/TestRunner.h>
|
11
|
+
|
12
|
+
/**
|
13
|
+
* The main funnction of unit test batch runner.
|
14
|
+
* @param argc ignored.
|
15
|
+
* @param argv ignored.
|
16
|
+
*/
|
17
|
+
int main(int argc, char* argv[]) {
|
18
|
+
// Create the event manager and test controller
|
19
|
+
CPPUNIT_NS::TestResult controller;
|
20
|
+
|
21
|
+
// Add a listener that colllects test result
|
22
|
+
CPPUNIT_NS::TestResultCollector result;
|
23
|
+
controller.addListener( &result );
|
24
|
+
|
25
|
+
// Add a listener that print dots as test run.
|
26
|
+
CPPUNIT_NS::BriefTestProgressListener progress;
|
27
|
+
controller.addListener( &progress );
|
28
|
+
|
29
|
+
// Add the top suite to the test runner
|
30
|
+
CPPUNIT_NS::TestRunner runner;
|
31
|
+
runner.addTest( CPPUNIT_NS::TestFactoryRegistry::getRegistry().makeTest() );
|
32
|
+
runner.run( controller );
|
33
|
+
|
34
|
+
// Print test in a compiler compatible format.
|
35
|
+
CPPUNIT_NS::CompilerOutputter outputter( &result, std::cerr );
|
36
|
+
outputter.write();
|
37
|
+
|
38
|
+
return result.wasSuccessful() ? 0 : 1;
|
39
|
+
}
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module TraceVisualization
|
2
|
+
module Profile
|
3
|
+
|
4
|
+
def self.time(name)
|
5
|
+
start = Time.now
|
6
|
+
|
7
|
+
puts "#{start} Start #{name}"
|
8
|
+
|
9
|
+
yield
|
10
|
+
|
11
|
+
finish = Time.now
|
12
|
+
|
13
|
+
puts "#{finish} Finish #{name}"
|
14
|
+
|
15
|
+
(finish.to_f - start.to_f).round(3)
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.processing_time(message, logger = nil, object = nil, method = nil)
|
19
|
+
start = Time.now
|
20
|
+
|
21
|
+
yield
|
22
|
+
|
23
|
+
finish = Time.now
|
24
|
+
|
25
|
+
puts "#{message}#{object != nil ? (object.send(method)) : ''}, pt = #{(finish.to_f - start.to_f).round(4)} sec"
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.pt
|
29
|
+
start = Time.now
|
30
|
+
|
31
|
+
yield
|
32
|
+
|
33
|
+
finish = Time.now
|
34
|
+
|
35
|
+
(finish.to_f - start.to_f).round(4)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -1,25 +1,36 @@
|
|
1
1
|
module TraceVisualization
|
2
2
|
module Reorder
|
3
3
|
|
4
|
-
# Assign new
|
5
|
-
# and max
|
4
|
+
# Assign new int_values (ord field) in order to reduce the distance between
|
5
|
+
# min and max int_values. It's necessary to reduce the size of the alphabet.
|
6
|
+
# Return max int_value
|
6
7
|
def self.process(data)
|
7
|
-
sorted = data.sort
|
8
|
-
c = a.value - b.value
|
9
|
-
c == 0 ? 0 : (c < 0 ? -1 : 1)
|
10
|
-
end
|
8
|
+
sorted = data.sort { |a, b| a.int_value <=> b.int_value }
|
11
9
|
|
12
|
-
|
13
|
-
prev = nil
|
10
|
+
termination_chars = []
|
14
11
|
|
12
|
+
idx, prev = 0, nil
|
15
13
|
sorted.each do |item|
|
16
|
-
if prev != item.
|
17
|
-
prev = item.
|
14
|
+
if prev != item.int_value
|
15
|
+
prev = item.int_value
|
18
16
|
idx += 1
|
19
17
|
end
|
20
18
|
|
21
|
-
item.
|
19
|
+
if item.int_value == TraceVisualization::TERMINATION_CHAR.ord
|
20
|
+
termination_chars << item
|
21
|
+
idx -= 1
|
22
|
+
else
|
23
|
+
item.ord = idx
|
24
|
+
end
|
22
25
|
end
|
26
|
+
|
27
|
+
if termination_chars.size > 0
|
28
|
+
# Set maximal value for termination char
|
29
|
+
termination_chars.each { |x| x.ord = idx + 1 }
|
30
|
+
idx += 1
|
31
|
+
end
|
32
|
+
|
33
|
+
idx
|
23
34
|
end
|
24
35
|
end
|
25
36
|
end
|
@@ -10,7 +10,7 @@ module TraceVisualization
|
|
10
10
|
lcp = TraceVisualization::LongestCommonPrefix.effective(str, sa, str.size)
|
11
11
|
bwt = TraceVisualization::BurrowsWheelerTransform.bwt(str, sa, str.length)
|
12
12
|
|
13
|
-
result = psy1_original(lcp, bwt,
|
13
|
+
result = psy1_original(lcp, bwt, p_min, str.length)
|
14
14
|
result = decode_psy1_result(result, sa) if decode_result
|
15
15
|
|
16
16
|
result
|
@@ -27,6 +27,8 @@ module TraceVisualization
|
|
27
27
|
str.each_char { |c| s << c.ord }
|
28
28
|
elsif str.instance_of? Array
|
29
29
|
str.each { |c| s << c.ord }
|
30
|
+
else
|
31
|
+
s = str
|
30
32
|
end
|
31
33
|
|
32
34
|
3.times { s << 0 }
|
@@ -35,6 +37,8 @@ module TraceVisualization
|
|
35
37
|
|
36
38
|
effective_linear(s, suffix_array, n, s.max + 1)
|
37
39
|
|
40
|
+
3.times { s.pop }
|
41
|
+
|
38
42
|
suffix_array[0 ... -3]
|
39
43
|
end
|
40
44
|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'trace_visualization/assert'
|
2
|
+
|
1
3
|
module TraceVisualization
|
2
4
|
module Utils
|
3
5
|
|
@@ -5,8 +7,11 @@ module TraceVisualization
|
|
5
7
|
lp.hash + rp.hash
|
6
8
|
end
|
7
9
|
|
10
|
+
|
8
11
|
# Get the start position of lines
|
9
12
|
def self.lines_pos(str)
|
13
|
+
TraceVisualization.assert_instance_of(str, String)
|
14
|
+
|
10
15
|
lines_pos = [0]
|
11
16
|
pos = -1
|
12
17
|
|
@@ -43,5 +48,38 @@ module TraceVisualization
|
|
43
48
|
rs_by_line
|
44
49
|
end
|
45
50
|
|
51
|
+
# Read data from file
|
52
|
+
# Allowed options
|
53
|
+
# file_name file name
|
54
|
+
# n_bytes the number of bytes
|
55
|
+
# n_lines the number of lines
|
56
|
+
#
|
57
|
+
# If both options - n_bytes and n_lines - are set, it uses n_bytes
|
58
|
+
def self.read_file(options)
|
59
|
+
str = nil
|
60
|
+
|
61
|
+
if options[:file_name]
|
62
|
+
str = ""
|
63
|
+
if options[:n_bytes] != nil
|
64
|
+
str = IO.read(options[:file_name], options[:n_bytes])
|
65
|
+
else
|
66
|
+
fd = open(options[:file_name])
|
67
|
+
limit = options[:n_lines] || 2**32
|
68
|
+
|
69
|
+
begin
|
70
|
+
while (line = fd.readline)
|
71
|
+
str += line
|
72
|
+
|
73
|
+
limit -= 1
|
74
|
+
break if limit == 0
|
75
|
+
end
|
76
|
+
rescue EOFError => e
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
str
|
82
|
+
end
|
83
|
+
|
46
84
|
end # module Utils
|
47
85
|
end # module TraceVisualization
|
@@ -1,6 +1,6 @@
|
|
1
|
-
require '
|
1
|
+
require 'trace_visualization/data/repetition'
|
2
2
|
|
3
|
-
module
|
3
|
+
module TraceVisualization
|
4
4
|
module Visualization
|
5
5
|
module ConsoleColorPrint
|
6
6
|
|
@@ -9,6 +9,8 @@ module Travis
|
|
9
9
|
FNSH = "\033[0m"
|
10
10
|
|
11
11
|
def self.hl(str, repetition)
|
12
|
+
raise "repetition must be instance of TraceVisualization::Data::Repetition" if not repetition.instance_of? TraceVisualization::Data::Repetition
|
13
|
+
|
12
14
|
result = ""
|
13
15
|
prev_position = 0
|
14
16
|
positions = repetition.build_positions
|
data/lib/trace_visualization.rb
CHANGED
@@ -1,10 +1,74 @@
|
|
1
|
-
|
1
|
+
%w(
|
2
|
+
mapping
|
3
|
+
utils
|
4
|
+
version
|
5
|
+
).each { |file| require File.join(File.dirname(__FILE__), 'trace_visualization', file) }
|
6
|
+
|
7
|
+
require 'logger'
|
8
|
+
require 'benchmark'
|
2
9
|
|
3
10
|
module TraceVisualization
|
4
|
-
|
11
|
+
|
5
12
|
# Should be 'greater' of all possible chars in the lexicographical order
|
6
|
-
TERMINATION_CHAR =
|
7
|
-
|
13
|
+
TERMINATION_CHAR = 255.chr
|
14
|
+
|
8
15
|
FORBIDDEN_CHARS = /\n/
|
16
|
+
|
17
|
+
#
|
18
|
+
# options[:str]
|
19
|
+
# options[:file_name]
|
20
|
+
#
|
21
|
+
def self.process(options = {})
|
22
|
+
options = set_default_options(options)
|
23
|
+
logger = options[:logger]
|
24
|
+
|
25
|
+
# Preprocess
|
26
|
+
file_name = options[:file_name]
|
27
|
+
|
28
|
+
# Read & mapping file
|
29
|
+
mapping = TraceVisualization::Mapping.new
|
30
|
+
mapping.process do
|
31
|
+
from_preprocessed_file options[:file_name]
|
32
|
+
end
|
33
|
+
|
34
|
+
=begin
|
35
|
+
logger.info 'start process'
|
36
|
+
|
37
|
+
str = nil
|
38
|
+
str_mapped = nil
|
39
|
+
|
40
|
+
Benchmark.bm(14) do |x|
|
41
|
+
x.report('read file') { str = options[:str] || TraceVisualization::Utils.read_file(options) }
|
42
|
+
x.report('mapping') { str_mapped = TraceVisualization::Mapping.new(str) }
|
43
|
+
end
|
44
|
+
|
45
|
+
str_len = str.length
|
46
|
+
map_len = str_mapped.length
|
47
|
+
logger.info("str.length = #{str_len}, str_mapped.length = #{map_len}, compression = #{((str_len.to_f - map_len) / str_len.to_f).round(2)}%")
|
48
|
+
|
49
|
+
return []
|
50
|
+
|
51
|
+
rs = TraceVisualization::Repetitions.psy1(str_mapped, options[:p_min], true)
|
52
|
+
|
53
|
+
logger.info 'PSY1 finish. build context'
|
54
|
+
|
55
|
+
context = TraceVisualization::Repetitions::Context.new(str_mapped, rs)
|
56
|
+
|
57
|
+
logger.info 'first concat step'
|
58
|
+
|
59
|
+
TraceVisualization::RepetitionsConcatenation.process(rs, 1, context)
|
60
|
+
|
61
|
+
# Approximate
|
62
|
+
# Vissss
|
63
|
+
=end
|
64
|
+
#rs
|
65
|
+
end
|
66
|
+
|
67
|
+
def self.set_default_options(options)
|
68
|
+
options = {
|
69
|
+
:p_min => 3,
|
70
|
+
:logger => Logger.new(STDOUT)
|
71
|
+
}.merge options
|
72
|
+
end
|
9
73
|
|
10
74
|
end
|