sm-transcript 0.0.4 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- data/README.txt +138 -118
- data/Rakefile +21 -10
- data/bin/sm-transcript +0 -0
- data/lib/sm_transcript/metadata.rb +25 -0
- data/lib/sm_transcript/options.rb +9 -3
- data/lib/sm_transcript/runner.rb +6 -0
- data/lib/sm_transcript/seg_reader.rb +1 -1
- data/lib/sm_transcript/transcript.rb +86 -39
- data/lib/sm_transcript/ttml_reader.rb +116 -0
- data/lib/sm_transcript/word.rb +6 -4
- data/lib/sm_transcript/wrd_reader.rb +5 -4
- data/test/results/18.03-2004-L01.align2.wrd +6441 -0
- data/test/results/8.01-1999-L01.wrd +5182 -0
- data/test/results/801-1stLecture.ttml.xml +757 -0
- data/test/results/801-lect01-4730.xml +757 -0
- data/test/results/801-lect02-4731.xml +886 -0
- data/test/results/801-lect03-4732.xml +818 -0
- data/test/results/801-lect04-4733.xml +831 -0
- data/test/results/801-lect05-4734.xml +879 -0
- data/test/results/801-lect06-4735.xml +822 -0
- data/test/results/801-lect07-4736.xml +893 -0
- data/test/results/801-lect08-4737.xml +809 -0
- data/test/results/801-lect09-4738.xml +807 -0
- data/test/results/Audio-Open-The_New_Deal_for_Education.xml +4301 -0
- data/test/test_metadatareader.rb +8 -3
- data/test/test_options.rb +8 -1
- data/test/test_runner.rb +34 -1
- data/test/test_transcript.rb +109 -12
- data/test/test_ttmlreader.rb +104 -0
- data/test/test_wrdreader.rb +24 -9
- metadata +47 -148
- data/lib/sm_transcript/optparseExample.rb +0 -113
- data/lib/sm_transcript/process_csv_files_to_html.rb +0 -58
- data/lib/sm_transcript/process_seg_files.rb +0 -21
- data/lib/sm_transcript/process_seg_files_to_csv.rb +0 -24
- data/lib/sm_transcript/process_seg_files_to_html.rb +0 -31
- data/lib/sm_transcript/require_relative.rb +0 -14
- data/test/transcripts/GardnerRileyInterview.t1.html +0 -247
- data/test/transcripts/IIHS_Diane_Davis_Nov2009-t1.html +0 -148
- data/test/transcripts/NERCOMP-SpokenMedia4.t1.html +0 -2178
- data/test/transcripts/data.js +0 -24
- data/test/transcripts/vijay_kumar-1.-t1.html +0 -557
- data/test/transcripts/vijay_kumar-1.t1.html +0 -558
- data/test/transcripts/vijay_kumar-t1.html +0 -558
- data/test/transcripts/vijay_kumar-t1.ttml +0 -570
- data/test/transcripts/vijay_kumar.data.js +0 -2
- data/test/transcripts/vijay_kumar.t1.html +0 -557
- data/test/transcripts/wirehair-beetle.data.js +0 -24
data/test/test_metadatareader.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# $Id: test_metadatareader.rb
|
1
|
+
# $Id: test_metadatareader.rb 196 2010-06-11 18:51:18Z pwilkins $
|
2
2
|
# Copyright (c) 2010 Massachusetts Institute of Technology
|
3
3
|
# see LICENSE.txt for license text
|
4
4
|
|
@@ -10,13 +10,13 @@ require_relative '../lib/sm_transcript/metadata_reader'
|
|
10
10
|
|
11
11
|
class TestMetadataReader < Test::Unit::TestCase
|
12
12
|
|
13
|
-
context "Specifying a metadata file the .to_file method" do
|
13
|
+
context "Specifying a .TXT metadata file the .to_file method" do
|
14
14
|
should "return an object (you call that a test?)" do
|
15
15
|
obj = SmTranscript::MetadataReader.from_file("results/wirehair-beetle.txt")
|
16
16
|
assert_not_nil(obj)
|
17
17
|
end
|
18
18
|
|
19
|
-
should "read the
|
19
|
+
should "read the plain text and place it in fields " do
|
20
20
|
md = SmTranscript::MetadataReader.from_file("results/wirehair-beetle.txt")
|
21
21
|
# p md
|
22
22
|
assert_equal 'Nimrod Olson', md.metadata['name'].nil? ? 'empty' : md.metadata['name'].chomp
|
@@ -27,4 +27,9 @@ class TestMetadataReader < Test::Unit::TestCase
|
|
27
27
|
assert_equal "aboriginal_hairstyling.mov", md.metadata['video'].nil? ? 'empty' : md.metadata['video'].chomp
|
28
28
|
end
|
29
29
|
end
|
30
|
+
|
31
|
+
# context "Specifying a XML metadata file the .to_file method" do
|
32
|
+
# should "read the DC elements and place it in fields"
|
33
|
+
# end
|
34
|
+
|
30
35
|
end
|
data/test/test_options.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# $Id: test_options.rb
|
1
|
+
# $Id: test_options.rb 196 2010-06-11 18:51:18Z pwilkins $
|
2
2
|
# Copyright (c) 2010 Massachusetts Institute of Technology
|
3
3
|
# see LICENSE.txt for license text
|
4
4
|
|
@@ -36,6 +36,13 @@ class TestOptions < Test::Unit::TestCase
|
|
36
36
|
assert_equal 'wrd', opts.srctype
|
37
37
|
end
|
38
38
|
|
39
|
+
should "read srctype:ttml, desttype:json" do
|
40
|
+
opts = SmTranscript::Options.new([
|
41
|
+
'--desttype', 'json', '--srctype', 'ttml'])
|
42
|
+
assert_equal 'json', opts.desttype
|
43
|
+
assert_equal 'ttml', opts.srctype
|
44
|
+
end
|
45
|
+
|
39
46
|
should "read srctype:wrd, desttype:datajs" do
|
40
47
|
opts = SmTranscript::Options.new([
|
41
48
|
'--desttype', 'datajs', '--srctype', 'wrd'])
|
data/test/test_runner.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# $Id: test_runner.rb
|
1
|
+
# $Id: test_runner.rb 196 2010-06-11 18:51:18Z pwilkins $
|
2
2
|
# Copyright (c) 2010 Massachusetts Institute of Technology
|
3
3
|
# see LICENSE.txt for license text
|
4
4
|
|
@@ -42,6 +42,39 @@ class TestRunner < Test::Unit::TestCase
|
|
42
42
|
runner.run
|
43
43
|
end
|
44
44
|
|
45
|
+
fname01 = '801-1stLecture.ttml.xml'
|
46
|
+
fname02 = 'Audio-Open-The_New_Deal_for_Education.xml'
|
47
|
+
|
48
|
+
should "return specified ttml srctype, and default values" do
|
49
|
+
runner = SmTranscript::Runner.new(["--srctype", 'xml', '--desttype', 'html'])
|
50
|
+
opts = runner.options
|
51
|
+
assert File.exists?("#{opts.srcdir}/#{fname01}"),
|
52
|
+
"Source file not found: #{opts.srcdir}/#{fname01}"
|
53
|
+
assert_equal './transcripts', opts.destdir
|
54
|
+
assert_equal './results', opts.srcdir
|
55
|
+
assert_equal 'xml', opts.srctype
|
56
|
+
assert_equal 'html', opts.desttype
|
57
|
+
runner.run
|
58
|
+
assert(File.exists?("#{opts.destdir}/#{fname01}-t1.html"),
|
59
|
+
"File not found: #{opts.destdir}/#{fname01}-t1.html")
|
60
|
+
end
|
61
|
+
|
62
|
+
fname03 = '8.01-1999-L01.wrd'
|
63
|
+
|
64
|
+
should "process .wrd file to .ttml, and default values" do
|
65
|
+
runner = SmTranscript::Runner.new(["--srctype", 'wrd', '--desttype', 'ttml'])
|
66
|
+
opts = runner.options
|
67
|
+
assert File.exists?("#{opts.srcdir}/#{fname03}"),
|
68
|
+
"Source file not found: #{opts.srcdir}/#{fname03}"
|
69
|
+
assert_equal './transcripts', opts.destdir
|
70
|
+
assert_equal './results', opts.srcdir
|
71
|
+
assert_equal 'wrd', opts.srctype
|
72
|
+
assert_equal 'ttml', opts.desttype
|
73
|
+
runner.run
|
74
|
+
assert(File.exists?("#{opts.destdir}/#{fname03}-t1.ttml"),
|
75
|
+
"File not found: #{opts.destdir}/#{fname03}-t1.ttml")
|
76
|
+
end
|
77
|
+
|
45
78
|
# I don't know how to test for the "invalid option" error that this test causes.
|
46
79
|
# should "return display usage information and optionally an error msg" do
|
47
80
|
# SmTranscript::Runner.new(["--niblick-mashie"])
|
data/test/test_transcript.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# $Id: test_transcript.rb
|
1
|
+
# $Id: test_transcript.rb 196 2010-06-11 18:51:18Z pwilkins $
|
2
2
|
# Copyright (c) 2010 Massachusetts Institute of Technology
|
3
3
|
# see LICENSE.txt for license text
|
4
4
|
|
@@ -9,10 +9,10 @@ require 'shoulda'
|
|
9
9
|
require_relative '../lib/sm_transcript/transcript'
|
10
10
|
require_relative '../lib/sm_transcript/seg_reader'
|
11
11
|
require_relative '../lib/sm_transcript/wrd_reader'
|
12
|
+
require_relative '../lib/sm_transcript/ttml_reader'
|
12
13
|
require_relative '../lib/sm_transcript/word'
|
13
14
|
|
14
15
|
class TestTranscript < Test::Unit::TestCase
|
15
|
-
# words = []
|
16
16
|
|
17
17
|
# context "write transcript to HTML in default dest dir" do
|
18
18
|
# should "create transcript file in ./transcripts" do
|
@@ -26,37 +26,134 @@ class TestTranscript < Test::Unit::TestCase
|
|
26
26
|
# end
|
27
27
|
# end
|
28
28
|
|
29
|
-
context "write transcript to HTML in default dest dir
|
29
|
+
context "Call SegReader, write transcript to HTML in default dest dir" do
|
30
30
|
should "create transcript file in ./transcripts" do
|
31
31
|
segfile = SmTranscript::SegReader.from_file("results/IIHS_Diane_Davis_Nov2009.seg")
|
32
32
|
t = SmTranscript::Transcript.new(segfile.words)
|
33
|
+
assert_not_nil(t, "unable to create words array from ./results/IIHS_Diane_Davis_Nov2009.seg")
|
34
|
+
assert(t.words.length > 1, "@words array contains single phrase")
|
33
35
|
t.write_html("transcripts/IIHS_Diane_Davis_Nov2009-t1.html")
|
34
36
|
|
35
37
|
# for now just check for the existence of a file
|
36
|
-
assert(File.exists?("transcripts/IIHS_Diane_Davis_Nov2009-t1.html")
|
38
|
+
assert(File.exists?("transcripts/IIHS_Diane_Davis_Nov2009-t1.html"),
|
39
|
+
"File not found: transcripts/IIHS_Diane_Davis_Nov2009-t1.html")
|
37
40
|
end
|
38
41
|
end
|
39
42
|
|
40
|
-
context "
|
43
|
+
context "Call WrdReader, write transcript to HTML in default dest dir" do
|
44
|
+
|
45
|
+
fname03 = 'vijay_kumar.wrd'
|
46
|
+
fext = '-t1.ttml'
|
47
|
+
|
41
48
|
should "create transcript file in ./transcripts" do
|
42
|
-
wrdfile = SmTranscript::WrdReader.from_file("results
|
49
|
+
wrdfile = SmTranscript::WrdReader.from_file("results/#{fname03}")
|
43
50
|
t = SmTranscript::Transcript.new(wrdfile.words)
|
51
|
+
assert_not_nil(t, "unable to create words array from ./results/#{fname03}")
|
52
|
+
assert(t.words.length > 1, "@words array contains single phrase")
|
44
53
|
t.write_html("transcripts/vijay_kumar-t1.html")
|
45
54
|
|
46
55
|
# for now just check for the existence of a file
|
47
|
-
assert(File.exists?("transcripts/vijay_kumar-t1.html")
|
56
|
+
assert(File.exists?("transcripts/vijay_kumar-t1.html"),
|
57
|
+
"File not found: transcripts/vijay_kumar-t1.html")
|
58
|
+
end
|
59
|
+
|
60
|
+
should "correct known phrases" do
|
61
|
+
# <span id='T9'>of MIT</span>
|
62
|
+
str_found = false
|
63
|
+
File.open("transcripts/vijay_kumar-t1.html").each do |ln|
|
64
|
+
if !ln.index("T9'\>of MIT").nil?
|
65
|
+
str_found = true
|
66
|
+
end
|
67
|
+
end
|
68
|
+
# assert(str_found)
|
69
|
+
end
|
70
|
+
|
71
|
+
should "create ttml transcript file for #{fname03}" do
|
72
|
+
wrdfile = SmTranscript::WrdReader.from_file("results/#{fname03}")
|
73
|
+
t = SmTranscript::Transcript.new(wrdfile.words)
|
74
|
+
assert_not_nil(t, "unable to create words array from ./results/#{fname03}")
|
75
|
+
assert(t.words.length > 1, "@words array contains single phrase")
|
76
|
+
t.write_ttml("transcripts/#{fname03}#{fext}")
|
77
|
+
|
78
|
+
# for now just check for the existence of a file
|
79
|
+
assert(File.exists?("transcripts/#{fname03}#{fext}"),
|
80
|
+
"File not found: transcripts/#{fname03}#{fext}")
|
81
|
+
end
|
82
|
+
|
83
|
+
fname04 = '8.01-1999-L01.wrd'
|
84
|
+
should "create tt transcript file for #{fname04}" do
|
85
|
+
wrdfile = SmTranscript::WrdReader.from_file("results/#{fname04}")
|
86
|
+
t = SmTranscript::Transcript.new(wrdfile.words)
|
87
|
+
assert_not_nil(t, "unable to create words array from ./results/#{fname04}")
|
88
|
+
assert(t.words.length > 1, "@words array contains single phrase")
|
89
|
+
t.write_ttml("transcripts/#{fname04}#{fext}")
|
90
|
+
|
91
|
+
# for now just check for the existence of a file
|
92
|
+
assert(File.exists?("transcripts/#{fname04}#{fext}"),
|
93
|
+
"File not found: transcripts/#{fname04}#{fext}")
|
48
94
|
end
|
49
95
|
end
|
50
96
|
|
51
|
-
context "Calling
|
52
|
-
|
53
|
-
|
97
|
+
context "Calling TtmlReader, writing transcript to HTML in default dest dir" do
|
98
|
+
|
99
|
+
fname01 = '801-1stLecture.ttml.xml'
|
100
|
+
fname02 = 'Audio-Open-The_New_Deal_for_Education.xml'
|
101
|
+
fext = '-t1.html'
|
102
|
+
|
103
|
+
should "create #{fname01}-t1.html file in ./transcripts" do
|
104
|
+
wrdfile = SmTranscript::TtmlReader.from_file("results/#{fname01}")
|
105
|
+
t = SmTranscript::Transcript.new(wrdfile.words)
|
106
|
+
assert_not_nil(t, "unable to create words array from ./results/#{fname01}")
|
107
|
+
assert(t.words.length > 1, "@words array contains single phrase")
|
108
|
+
t.write_html("transcripts/#{fname01}#{fext}")
|
109
|
+
|
110
|
+
# for now just check for the existence of a file
|
111
|
+
assert(File.exists?("transcripts/#{fname01}#{fext}"),
|
112
|
+
"File not found: transcripts/#{fname01}#{fext}")
|
113
|
+
end
|
114
|
+
|
115
|
+
should "create #{fname02}-t1.html file in ./transcripts" do
|
116
|
+
wrdfile = SmTranscript::TtmlReader.from_file("results/#{fname02}")
|
54
117
|
t = SmTranscript::Transcript.new(wrdfile.words)
|
55
|
-
t
|
118
|
+
assert_not_nil(t, "unable to create words array from ./results/#{fname02}")
|
119
|
+
assert(t.words.length > 1, "@words array contains single phrase")
|
120
|
+
t.write_html("transcripts/#{fname02}#{fext}")
|
56
121
|
|
57
122
|
# for now just check for the existence of a file
|
58
|
-
assert(File.exists?("transcripts
|
123
|
+
assert(File.exists?("transcripts/#{fname02}#{fext}"),
|
124
|
+
"File not found: transcripts/#{fname02}#{fext}")
|
59
125
|
end
|
60
126
|
end
|
61
127
|
|
128
|
+
|
129
|
+
context "Call TtmlReader, write transcript to TTML in default dest dir" do
|
130
|
+
|
131
|
+
fname01 = '801-1stLecture.ttml.xml'
|
132
|
+
fname02 = '801-lect02-4731.xml'
|
133
|
+
fext = '-t1.ttml'
|
134
|
+
|
135
|
+
should "create #{fname01}#{fext} file in ./transcripts" do
|
136
|
+
wrdfile = SmTranscript::TtmlReader.from_file("results/#{fname01}")
|
137
|
+
t = SmTranscript::Transcript.new(wrdfile.words)
|
138
|
+
assert_not_nil(t, "unable to create words array from ./results/#{fname01}")
|
139
|
+
assert(t.words.length > 1, "@words array contains single phrase")
|
140
|
+
t.write_ttml("transcripts/#{fname01}#{fext}")
|
141
|
+
|
142
|
+
# for now just check for the existence of a file
|
143
|
+
assert(File.exists?("transcripts/#{fname01}#{fext}"),
|
144
|
+
"File not found: transcripts/#{fname01}#{fext}")
|
145
|
+
end
|
146
|
+
|
147
|
+
should "create #{fname02}-t1.html file in ./transcripts" do
|
148
|
+
wrdfile = SmTranscript::TtmlReader.from_file("results/#{fname02}")
|
149
|
+
t = SmTranscript::Transcript.new(wrdfile.words)
|
150
|
+
assert_not_nil(t, "unable to create words array from ./results/#{fname02}")
|
151
|
+
assert(t.words.length > 1, "@words array contains single phrase")
|
152
|
+
t.write_ttml("transcripts/#{fname02}#{fext}")
|
153
|
+
|
154
|
+
# for now just check for the existence of a file
|
155
|
+
assert(File.exists?("transcripts/#{fname02}#{fext}"),
|
156
|
+
"File not found: transcripts/#{fname02}#{fext}")
|
157
|
+
end
|
158
|
+
end
|
62
159
|
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
# $Id $
|
2
|
+
# Copyright (c) 2010 Massachusetts Institute of Technology
|
3
|
+
# see LICENSE.txt for license text
|
4
|
+
|
5
|
+
require 'rubygems'
|
6
|
+
require 'extensions/kernel'
|
7
|
+
require 'test/unit'
|
8
|
+
require 'shoulda'
|
9
|
+
require_relative '../lib/sm_transcript/ttml_reader'
|
10
|
+
|
11
|
+
class TestTtmlReader < Test::Unit::TestCase
|
12
|
+
fname01 = '801-1stLecture.ttml.xml'
|
13
|
+
fname02 = 'Audio-Open-The_New_Deal_for_Education.xml'
|
14
|
+
fname03 = '801-lect01-4730.xml'
|
15
|
+
fname04 = '801-lect02-4731.xml'
|
16
|
+
|
17
|
+
context "Specifying a ttml source file" do
|
18
|
+
should "verify that the file can be opened" do
|
19
|
+
f = SmTranscript::TtmlReader.from_file("results/#{fname01}")
|
20
|
+
assert_not_nil(f)
|
21
|
+
end
|
22
|
+
# should "report if the file can't be found" do
|
23
|
+
# wrdfile = SmTranscript::TtmlReader.from_file("results/this_file_doesnt_exist")
|
24
|
+
# assert_not_nil(wrdfile)
|
25
|
+
# end
|
26
|
+
end
|
27
|
+
|
28
|
+
context "processing a TTML source file" do
|
29
|
+
should "create an Array of Word objects from #{fname01}" do
|
30
|
+
f = SmTranscript::TtmlReader.from_file("results/#{fname01}")
|
31
|
+
|
32
|
+
assert_not_nil(f, "unable to create words array from ./results/#{fname01}")
|
33
|
+
assert_equal 766, f.words[0].start_time
|
34
|
+
assert_equal 2033, f.words[0].end_time
|
35
|
+
assert_equal 1267, f.words[0].duration
|
36
|
+
assert_equal "I'm Walter Lewin.", f.words[0].word
|
37
|
+
end
|
38
|
+
|
39
|
+
should "create an Array of Word objects from #{fname02}" do
|
40
|
+
f = SmTranscript::TtmlReader.from_file("results/#{fname02}")
|
41
|
+
|
42
|
+
assert_not_nil(f, "unable to create words array from ./results/#{fname02}")
|
43
|
+
last_index = f.words.length - 1
|
44
|
+
assert_equal 6580, f.words[0].start_time
|
45
|
+
assert_equal 8950, f.words[0].end_time
|
46
|
+
assert_equal 2370, f.words[0].duration
|
47
|
+
assert_equal "Let me welcome all of you on", f.words[0].word
|
48
|
+
|
49
|
+
assert_equal 8950, f.words[1].start_time
|
50
|
+
assert_equal 11320, f.words[1].end_time
|
51
|
+
assert_equal 2370, f.words[1].duration
|
52
|
+
assert_equal " behalf of MIT, on behalf of the", f.words[1].word
|
53
|
+
|
54
|
+
assert_equal 3027795, f.words[last_index].start_time
|
55
|
+
assert_equal 3033750, f.words[last_index].end_time
|
56
|
+
assert_equal 5955, f.words[last_index].duration
|
57
|
+
assert_equal " noticed, but thanks a lot.", f.words[last_index].word
|
58
|
+
end
|
59
|
+
|
60
|
+
should "create an Array of Word objects from #{fname03}" do
|
61
|
+
f = SmTranscript::TtmlReader.from_file("results/#{fname03}")
|
62
|
+
|
63
|
+
assert_not_nil(f, "unable to create words array from ./results/#{fname03}")
|
64
|
+
last_index = f.words.length - 1
|
65
|
+
assert( (last_index + 1) == 1188, "Unexpected number of word entries")
|
66
|
+
assert_equal 766, f.words[0].start_time
|
67
|
+
assert_equal 2033, f.words[0].end_time
|
68
|
+
assert_equal 1267, f.words[0].duration
|
69
|
+
assert_equal "I'm Walter Lewin.", f.words[0].word
|
70
|
+
|
71
|
+
assert_equal 2033, f.words[1].start_time
|
72
|
+
assert_equal 3399, f.words[1].end_time
|
73
|
+
assert_equal 1366, f.words[1].duration
|
74
|
+
assert_equal "I will be your lecturer", f.words[1].word
|
75
|
+
|
76
|
+
assert_equal 2286666, f.words[last_index].start_time
|
77
|
+
assert_equal 2288266, f.words[last_index].end_time
|
78
|
+
assert_equal 1600, f.words[last_index].duration
|
79
|
+
assert_equal "See you Friday.", f.words[last_index].word
|
80
|
+
end
|
81
|
+
|
82
|
+
should "create an Array of Word objects from #{fname04}" do
|
83
|
+
f = SmTranscript::TtmlReader.from_file("results/#{fname04}")
|
84
|
+
|
85
|
+
assert_not_nil(f, "unable to create words array from ./results/#{fname04}")
|
86
|
+
last_index = f.words.length - 1
|
87
|
+
assert_equal 1033, f.words[0].start_time
|
88
|
+
assert_equal 2516, f.words[0].end_time
|
89
|
+
assert_equal 1483, f.words[0].duration
|
90
|
+
assert_equal "We will discuss velocities", f.words[0].word
|
91
|
+
|
92
|
+
assert_equal 2516, f.words[1].start_time
|
93
|
+
assert_equal 3999, f.words[1].end_time
|
94
|
+
assert_equal 1483, f.words[1].duration
|
95
|
+
assert_equal "and acceleration.", f.words[1].word
|
96
|
+
|
97
|
+
assert_equal 3061566, f.words[last_index].start_time
|
98
|
+
assert_equal 3062566, f.words[last_index].end_time
|
99
|
+
assert_equal 1000, f.words[last_index].duration
|
100
|
+
assert_equal "See you Monday.", f.words[last_index].word
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
data/test/test_wrdreader.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# $Id: test_wrdreader.rb
|
1
|
+
# $Id: test_wrdreader.rb 196 2010-06-11 18:51:18Z pwilkins $
|
2
2
|
# Copyright (c) 2010 Massachusetts Institute of Technology
|
3
3
|
# see LICENSE.txt for license text
|
4
4
|
|
@@ -22,24 +22,39 @@ class TestWrdReader < Test::Unit::TestCase
|
|
22
22
|
end
|
23
23
|
|
24
24
|
context "processing a WRD source file" do
|
25
|
-
should "create
|
25
|
+
should "create Array of Word objects from vijay_kumar.wrd (lf line endings)" do
|
26
26
|
wrdfile = SmTranscript::WrdReader.from_file("results/vijay_kumar.wrd")
|
27
27
|
|
28
|
-
assert_not_nil(wrdfile)
|
28
|
+
assert_not_nil(wrdfile, "unable to create words array from ./results/vijay_kumar.wrd")
|
29
29
|
assert_equal "5660", wrdfile.words[0].start_time
|
30
30
|
assert_equal "6627", wrdfile.words[0].end_time
|
31
31
|
assert_equal "okay", wrdfile.words[0].word
|
32
32
|
end
|
33
|
-
end
|
34
33
|
|
35
|
-
|
36
|
-
should "create an Array of Word objects from GardnerRileyInterview.wrd (crlf line endings)" do
|
34
|
+
should "create Array of Word objects from GardnerRileyInterview.wrd (crlf line endings)" do
|
37
35
|
wrdfile = SmTranscript::WrdReader.from_file("results/GardnerRileyInterview.wrd")
|
38
|
-
|
39
|
-
assert_not_nil(wrdfile)
|
40
|
-
assert_equal "1630", wrdfile.words[0].start_time
|
36
|
+
|
37
|
+
assert_not_nil(wrdfile, "unable to create words array from ./results/GardnerRileyInterview.wrd")
|
38
|
+
assert_equal "1630", wrdfile.words[0].start_time # first word
|
41
39
|
assert_equal "1815", wrdfile.words[0].end_time
|
42
40
|
assert_equal "this", wrdfile.words[0].word
|
41
|
+
assert_equal "321696", wrdfile.words[539].start_time # last word
|
42
|
+
assert_equal "321785", wrdfile.words[539].end_time
|
43
|
+
assert_equal "you", wrdfile.words[539].word
|
44
|
+
assert_equal 540, wrdfile.words.length
|
45
|
+
end
|
46
|
+
|
47
|
+
should "create Array of Word objects from 8.01-1999-L01.wrd (lf line endings)" do
|
48
|
+
wrdfile = SmTranscript::WrdReader.from_file("results/8.01-1999-L01.wrd")
|
49
|
+
|
50
|
+
assert_not_nil(wrdfile, "unable to create words array from ./results/8.01-1999-L01.wrd")
|
51
|
+
assert_equal "2010", wrdfile.words[0].start_time # first word
|
52
|
+
assert_equal "2125", wrdfile.words[0].end_time
|
53
|
+
assert_equal "I'm", wrdfile.words[0].word
|
54
|
+
assert_equal "2288600", wrdfile.words[5181].start_time # last word
|
55
|
+
assert_equal "2290545", wrdfile.words[5181].end_time
|
56
|
+
assert_equal "<noise>", wrdfile.words[5181].word
|
57
|
+
assert_equal 5182, wrdfile.words.length
|
43
58
|
end
|
44
59
|
end
|
45
60
|
|
metadata
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sm-transcript
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
+
hash: 19
|
4
5
|
prerelease: false
|
5
6
|
segments:
|
6
7
|
- 0
|
7
8
|
- 0
|
8
|
-
-
|
9
|
-
version: 0.0.
|
9
|
+
- 6
|
10
|
+
version: 0.0.6
|
10
11
|
platform: ruby
|
11
12
|
authors:
|
12
13
|
- Peter Wilkins
|
@@ -14,133 +15,28 @@ autorequire:
|
|
14
15
|
bindir: bin
|
15
16
|
cert_chain: []
|
16
17
|
|
17
|
-
date: 2010-
|
18
|
+
date: 2010-07-19 00:00:00 -04:00
|
18
19
|
default_executable:
|
19
20
|
dependencies: []
|
20
21
|
|
21
|
-
description: "$Id: README.txt
|
22
|
+
description: "$Id: README.txt 196 2010-06-11 18:51:18Z pwilkins $\n\n\
|
22
23
|
sm-transcript reads results of SLS processing and produces transcripts for\n\
|
23
24
|
the SpokenMedia browser. For each file in the source folder whose extension \n\
|
24
25
|
matches the source type, a file of destination type is created in the \n\
|
25
|
-
destination folder. All of these parameters have default values
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
\
|
31
|
-
\
|
32
|
-
\
|
33
|
-
\t\n\
|
34
|
-
|
35
|
-
\
|
36
|
-
\
|
37
|
-
\
|
38
|
-
|
39
|
-
\t\n\
|
40
|
-
\tsudo gem install [--verbose] sm-transcript\n\
|
41
|
-
\t\n\
|
42
|
-
\tThis command downloads the most recent version of the gem from rubygems.org\n\
|
43
|
-
\tand makes it active. Previous versions of the gem remain installed, but \n\
|
44
|
-
\tare deactivated.\n\
|
45
|
-
\t\n\
|
46
|
-
\tYou must use \"sudo\" to properly install the gem. If you execute \"gem \n\
|
47
|
-
\tinstall\" (omitting the \"sudo\") the gem is installed in your home gem \n\
|
48
|
-
\trepository and it isn't in your path without additional configuration.\n\
|
49
|
-
\t\n\
|
50
|
-
\tNote: You need sudo privileges to run the command as written. If you \n\
|
51
|
-
\tcan't sudo, then you can install it locally and will need some additional\n\
|
52
|
-
\tconfiguration. Contact me (or your local Ruby wizard) for assistance. \n\
|
53
|
-
\t \n\
|
54
|
-
\tThe executable is now in your path.\n\
|
55
|
-
\t\n\
|
56
|
-
\tYou can cleanly uninstall the gem with this command:\n\
|
57
|
-
\t\n\
|
58
|
-
\tsudo gem uninstall sm-transcript\t\n\
|
59
|
-
\t\n\
|
60
|
-
\tIf you have access to our svn repository, you are welcome to check out the \n\
|
61
|
-
\tcode. Be warned that the trunk tip is not necessarily stable. It changes \n\
|
62
|
-
\tfrequently as enhancements (and bug fixes) are added. (note that the\n\
|
63
|
-
\t'smb_transcript' in the command line below is not a typo. )\n\n\
|
64
|
-
\tsvn co svn+ssh://svn.mit.edu/oeit-tsa/SMB/smb_transcript/trunk sm_transcript\n\
|
65
|
-
\t\n\
|
66
|
-
\tbuild the gem by running this command from the directory you installed the \n\
|
67
|
-
\tsource.\n\
|
68
|
-
\t\n\
|
69
|
-
\trake gem\n\
|
70
|
-
\t\n\
|
71
|
-
\tThe gem will be built and put in ./pkg You can now use the gem \n\
|
72
|
-
\tinstallation instructions above.\n\
|
73
|
-
\t\n\n\
|
74
|
-
Using the App:\n\
|
75
|
-
\tRun with no command line parameters, the app reads *.wrd files out of \n\
|
76
|
-
\t./results and writes *t1.html files to ./transcripts. These directories\n\
|
77
|
-
\tare relative to where sm_transcript is called.\n\
|
78
|
-
\t\n\
|
79
|
-
\tNote: destination files are overwritten without a warning prompt. If you \n\
|
80
|
-
\twant to preserve an existing output file, rename it before running the app\n\
|
81
|
-
\tagain.\n\
|
82
|
-
\t\n\
|
83
|
-
\tFor example, run the app by navigating to the bin folder and running \n\n\
|
84
|
-
\t\tprojects/sm_transcript/bin felix$ sm_transcript\n\
|
85
|
-
\t\n\
|
86
|
-
\tThis command run from this folder will read *.wrd files from bin/results\n\
|
87
|
-
\tand write *-t1.html to bin/transcripts.\n\
|
88
|
-
\t\n\
|
89
|
-
\tUsage: sm_transcript [options] \n --srcdir PATH Read files from this folder (Default: ./results)\n --destdir PATH Write files to this folder (Default: ./transcripts)\n --srctype wrd | seg Kind of file to process (Default: wrd)\n --desttype html | ttml | datajs Kind of file to output (Default: html)\n -h, --help Show this message\t\n\n\n\
|
90
|
-
Troubleshooting:\n\
|
91
|
-
\tsm-transcript requires additional gems to operate. The RubyGem \n\
|
92
|
-
\tinstallation should install dependencies automatically, but when it \n\
|
93
|
-
\tdoesn't, you get an error that includes \n\
|
94
|
-
\t\n\
|
95
|
-
\t... no such file to load -- builder (LoadError)\n\
|
96
|
-
\t\n\
|
97
|
-
\tin the first few lines when you run sm-transcript, the problem is a \n\
|
98
|
-
\tmissing dependent gem. (the error above indicates that the Builder \n\
|
99
|
-
\tgem is missing.) Try installing the missing gem. For the error above,\n\
|
100
|
-
\tcommand looks like this:\n\
|
101
|
-
\t\n\
|
102
|
-
\tsudo gem install builder\n\
|
103
|
-
\t\n\
|
104
|
-
\tSee \"Required Gems\" below for more information.\n\
|
105
|
-
\t\n\
|
106
|
-
\t\t\n\
|
107
|
-
Upgrading:\n\
|
108
|
-
\tYou can easily upgrade by simply executing the same command you used to \n\
|
109
|
-
\tinstall the gem. Running install again will add the newer version and make\n\
|
110
|
-
\tit active. By default the most recent version is used, but older versions\n\
|
111
|
-
\tare still available, simply inactive.\n\
|
112
|
-
\t\n\
|
113
|
-
\tIf are using svn, you should already know what to do.\n\
|
114
|
-
\t\n\
|
115
|
-
\t\n\
|
116
|
-
Required Gems:\n\
|
117
|
-
\tbuilder - create structured data, such as XML\n\
|
118
|
-
\textensions - added for the 'require_relative' command. (To get this\n\
|
119
|
-
\t command in Ruby 1.8 you need to install this gem, for Ruby 1.9\n\
|
120
|
-
\t the command is already part of the core.)\n\
|
121
|
-
\thtmlentities - html parsing\n\
|
122
|
-
\tjson - create JSON structured data\n\
|
123
|
-
\toptparse - option parsing of command line\n\
|
124
|
-
\tostruct - open data structures\n\
|
125
|
-
\tppcommand - pp is a pretty printer. It is used only for debugging\n\
|
126
|
-
\trake - make for Ruby\n\
|
127
|
-
\trubygems - support for gems (shouldn't be needed for Ruby 1.9)\n\
|
128
|
-
\tshoulda - enhancement for Test::Unit\n\
|
129
|
-
\t\t\n\
|
130
|
-
\tThis command installs gems on OSX and Linux:\n\
|
131
|
-
\tfelix$ sudo gem install <gem name>\n\
|
132
|
-
\t\n\
|
133
|
-
Unit Tests:\n\
|
134
|
-
\tYou may run all unit tests by navigating to the test folder and running \n\
|
135
|
-
\trake with no parameters (the default rake task runs all tests):\n\n\
|
136
|
-
\tprojects/sm_transcript/test felix$ rake \n\n\n\
|
137
|
-
Release Notes:\n\
|
138
|
-
\tInitial Version - runs under Ruby 1.8. \n\n\
|
139
|
-
To Do:\n\
|
140
|
-
\tupdate code to run under Ruby 1.9\n\n\
|
141
|
-
\tMake this a rubygem, making it available from an OEIT server, rather than\n\
|
142
|
-
\tfrom a public gem repository like RubyForge.\n\
|
143
|
-
\t"
|
26
|
+
destination folder. All of these parameters have default values.\n\n\
|
27
|
+
Note: Examples of the commands you enter in the terminal are for *nix. The\n\
|
28
|
+
command prompt in the examples is:\n\n\
|
29
|
+
felix$ <command line>\n\n\
|
30
|
+
If you are a Windows user, make the usual adjustments.\n\n\
|
31
|
+
Requirements:\n sm-transcript is written in Ruby and packaged as a RubyGem. Since Ruby is\n not a compiled language, you will need to have Ruby installed on your \n machine to run sm-transcript. You can determine if Ruby is installed by \n typing \"ruby -v\" at a terminal prompt. It should return the version of \n Ruby that is installed. If Ruby is not installed on your machine, contact\n me (or your local Ruby wizard) for assistance.\n \n\
|
32
|
+
Installation:\n You can get sm-transcript as either a RubyGem or as source from svn.\n \n The preferred way to install this package is as a Rubygem. You can \n download and install the gem with this command: \n \n felix$ sudo gem install [--verbose] sm-transcript\n \n This command downloads the most recent version of the gem from rubygems.org\n and makes it active. Previous versions of the gem remain installed, but \n are deactivated.\n \n You must use \"sudo\" to properly install the gem. If you execute \"gem \n install\" (omitting the \"sudo\") the gem is installed in your home gem \n repository and it isn't in your path without additional configuration.\n \n Note: You need sudo privileges to run the command as written. If you \n can't sudo, then you can install it locally and will need some additional\n configuration. Contact me (or your local Ruby wizard) for assistance. \n \n The executable is now in your path.\n \n You can cleanly uninstall the gem with this command:\n \n felix$ sudo gem uninstall sm-transcript \n \n If you have access to our svn repository, you are welcome to check out the \n code. Be warned that the trunk tip is not necessarily stable. It changes \n frequently as enhancements (and bug fixes) are added. (note that the\n 'smb_transcript' in the command line below is not a typo.)\n\n svn co svn+ssh://svn.mit.edu/oeit-tsa/SMB/smb_transcript/trunk sm_transcript\n \n build the gem by running this command from the directory you installed the \n source. This is what it looks like on my machine:\n \n felix$ rake gem\n \n The gem will be built and put in ./pkg You can now use the gem \n installation instructions above.\n \n\n\
|
33
|
+
Using the App:\n Run with no command line parameters, the app reads *.wrd files out of \n ./results and writes *t1.html files to ./transcripts. These directories\n are relative to where sm_transcript is called.\n \n Note: destination files are overwritten without a warning prompt. If you \n want to preserve an existing output file, rename it before running the app\n again.\n \n For example, run the app by navigating to the bin folder and enter \n\n projects/sm_transcript/bin felix$ sm_transcript\n \n This command run from this folder will read *.wrd files from bin/results\n and write *-t1.html to bin/transcripts.\n \n Usage: sm_transcript [options] \n --srcdir PATH Read files from this folder (Default: ./results)\n --destdir PATH Write files to this folder (Default: ./transcripts)\n --srctype wrd | seg | txt | ttml Kind of file to process (Default: wrd)\n --desttype html | ttml | datajs | json Kind of file to output (Default: html)\n -h, --help Show this message \n\n\n\
|
34
|
+
Troubleshooting:\n sm-transcript requires additional gems to operate. The RubyGem \n installation should install dependencies automatically, but when it \n doesn't, you get an error that includes \n \n ... no such file to load -- builder (LoadError)\n \n in the first few lines when you run sm-transcript, the problem is a \n missing dependent gem. (the error above indicates that the Builder \n gem is missing.) Try installing the missing gem. For the error above,\n the command looks like this on my computer:\n \n felix$ sudo gem install builder\n \n See \"Required Gems\" below for more information.\n \n \n A warning message such as:\n \n \"WARNING: Nokogiri was built against LibXML version 2.7.6, \n but has dynamically loaded 2.7.7\"\"\n \n may be safely ignored.\n \n \n\
|
35
|
+
Upgrading:\n You can easily upgrade by simply executing the same command you used to \n install the gem. Running install again will add the newer version and make\n it active. By default the most recent version is used, but older versions\n are still available, simply inactive.\n \n If are using svn, you should already know what to do.\n \n \n\
|
36
|
+
Required Gems:\n builder - create structured data, such as XML\n extensions - added for the 'require_relative' command. (To get this\n command in Ruby 1.8 you need to install this gem, for Ruby 1.9\n the command is already part of the core.)\n htmlentities - html parsing\n json - create JSON structured data\n optparse - option parsing of command line\n ostruct - open data structures\n ppcommand - pp is a pretty printer. It is used only for debugging\n rake - make for Ruby\n rubygems - support for gems (shouldn't be needed for Ruby 1.9)\n shoulda - enhancement for Test::Unit\n \n This command installs gems on OSX and Linux:\n felix$ sudo gem install <gem name>\n \n\
|
37
|
+
Unit Tests:\n You may run all unit tests by navigating to the test folder and running \n rake with no parameters (the default rake task runs all tests). On my\n computer, it looks like this:\n\n projects/sm_transcript/test felix$ rake \n\n\n\
|
38
|
+
Release Notes:\n Initial Version - runs under Ruby 1.8.x. \n version 0.0.4 - fixes bug when processing .WRD files with CRLF line\n endings.\n version 0.0.5 - added srctype of ttml and desttype of json, fixed bug\n where beginning time of word was actually for previous word.\n\n\
|
39
|
+
To Do:\n specify individual files for processing rather than folders\n update code to run under Ruby 1.9\n\n\n "
|
144
40
|
email: pwilkins@mit.edu
|
145
41
|
executables:
|
146
42
|
- sm-transcript
|
@@ -153,47 +49,45 @@ files:
|
|
153
49
|
- lib/sm_transcript/metadata.rb
|
154
50
|
- lib/sm_transcript/metadata_reader.rb
|
155
51
|
- lib/sm_transcript/options.rb
|
156
|
-
- lib/sm_transcript/optparseExample.rb
|
157
|
-
- lib/sm_transcript/process_csv_files_to_html.rb
|
158
|
-
- lib/sm_transcript/process_seg_files.rb
|
159
|
-
- lib/sm_transcript/process_seg_files_to_csv.rb
|
160
|
-
- lib/sm_transcript/process_seg_files_to_html.rb
|
161
|
-
- lib/sm_transcript/require_relative.rb
|
162
52
|
- lib/sm_transcript/runner.rb
|
163
53
|
- lib/sm_transcript/seg_reader.rb
|
164
54
|
- lib/sm_transcript/transcript.rb
|
55
|
+
- lib/sm_transcript/ttml_reader.rb
|
165
56
|
- lib/sm_transcript/word.rb
|
166
57
|
- lib/sm_transcript/wrd_reader.rb
|
167
58
|
- bin/sm-transcript
|
168
59
|
- bin/results/PLACEHOLDER.txt
|
169
60
|
- bin/transcripts/PLACEHOLDER.txt
|
170
61
|
- test/Rakefile
|
171
|
-
- test/results/GardnerRileyInterview.wrd
|
172
|
-
- test/results/IIHS_Diane_Davis_Nov2009.seg
|
173
|
-
- test/results/NERCOMP-SpokenMedia4.wrd
|
174
|
-
- test/results/PLACEHOLDER.txt
|
175
|
-
- test/results/PLACEHOLDER.txt.ignore
|
176
|
-
- test/results/vijay_kumar.wrd
|
177
|
-
- test/results/wirehair-beetle.txt
|
178
62
|
- test/test_metadata.rb
|
179
63
|
- test/test_metadatareader.rb
|
180
64
|
- test/test_options.rb
|
181
65
|
- test/test_runner.rb
|
182
66
|
- test/test_segreader.rb
|
183
67
|
- test/test_transcript.rb
|
68
|
+
- test/test_ttmlreader.rb
|
184
69
|
- test/test_wrdreader.rb
|
185
|
-
- test/
|
186
|
-
- test/
|
187
|
-
- test/
|
188
|
-
- test/
|
70
|
+
- test/results/18.03-2004-L01.align2.wrd
|
71
|
+
- test/results/8.01-1999-L01.wrd
|
72
|
+
- test/results/801-1stLecture.ttml.xml
|
73
|
+
- test/results/801-lect01-4730.xml
|
74
|
+
- test/results/801-lect02-4731.xml
|
75
|
+
- test/results/801-lect03-4732.xml
|
76
|
+
- test/results/801-lect04-4733.xml
|
77
|
+
- test/results/801-lect05-4734.xml
|
78
|
+
- test/results/801-lect06-4735.xml
|
79
|
+
- test/results/801-lect07-4736.xml
|
80
|
+
- test/results/801-lect08-4737.xml
|
81
|
+
- test/results/801-lect09-4738.xml
|
82
|
+
- test/results/Audio-Open-The_New_Deal_for_Education.xml
|
83
|
+
- test/results/GardnerRileyInterview.wrd
|
84
|
+
- test/results/IIHS_Diane_Davis_Nov2009.seg
|
85
|
+
- test/results/NERCOMP-SpokenMedia4.wrd
|
86
|
+
- test/results/PLACEHOLDER.txt
|
87
|
+
- test/results/PLACEHOLDER.txt.ignore
|
88
|
+
- test/results/vijay_kumar.wrd
|
89
|
+
- test/results/wirehair-beetle.txt
|
189
90
|
- test/transcripts/PLACEHOLDER.txt
|
190
|
-
- test/transcripts/vijay_kumar-1.-t1.html
|
191
|
-
- test/transcripts/vijay_kumar-1.t1.html
|
192
|
-
- test/transcripts/vijay_kumar-t1.html
|
193
|
-
- test/transcripts/vijay_kumar-t1.ttml
|
194
|
-
- test/transcripts/vijay_kumar.data.js
|
195
|
-
- test/transcripts/vijay_kumar.t1.html
|
196
|
-
- test/transcripts/wirehair-beetle.data.js
|
197
91
|
- README.txt
|
198
92
|
- LICENSE.txt
|
199
93
|
- Rakefile
|
@@ -207,24 +101,28 @@ rdoc_options: []
|
|
207
101
|
require_paths:
|
208
102
|
- lib
|
209
103
|
required_ruby_version: !ruby/object:Gem::Requirement
|
104
|
+
none: false
|
210
105
|
requirements:
|
211
106
|
- - ">="
|
212
107
|
- !ruby/object:Gem::Version
|
108
|
+
hash: 31
|
213
109
|
segments:
|
214
110
|
- 1
|
215
111
|
- 8
|
216
112
|
version: "1.8"
|
217
113
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
114
|
+
none: false
|
218
115
|
requirements:
|
219
116
|
- - ">="
|
220
117
|
- !ruby/object:Gem::Version
|
118
|
+
hash: 3
|
221
119
|
segments:
|
222
120
|
- 0
|
223
121
|
version: "0"
|
224
122
|
requirements:
|
225
123
|
- TBD
|
226
124
|
rubyforge_project:
|
227
|
-
rubygems_version: 1.3.
|
125
|
+
rubygems_version: 1.3.7
|
228
126
|
signing_key:
|
229
127
|
specification_version: 3
|
230
128
|
summary: Convert word lists to transcripts
|
@@ -235,4 +133,5 @@ test_files:
|
|
235
133
|
- test/test_runner.rb
|
236
134
|
- test/test_segreader.rb
|
237
135
|
- test/test_transcript.rb
|
136
|
+
- test/test_ttmlreader.rb
|
238
137
|
- test/test_wrdreader.rb
|