sm-transcript 0.0.4 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. data/README.txt +138 -118
  2. data/Rakefile +21 -10
  3. data/bin/sm-transcript +0 -0
  4. data/lib/sm_transcript/metadata.rb +25 -0
  5. data/lib/sm_transcript/options.rb +9 -3
  6. data/lib/sm_transcript/runner.rb +6 -0
  7. data/lib/sm_transcript/seg_reader.rb +1 -1
  8. data/lib/sm_transcript/transcript.rb +86 -39
  9. data/lib/sm_transcript/ttml_reader.rb +116 -0
  10. data/lib/sm_transcript/word.rb +6 -4
  11. data/lib/sm_transcript/wrd_reader.rb +5 -4
  12. data/test/results/18.03-2004-L01.align2.wrd +6441 -0
  13. data/test/results/8.01-1999-L01.wrd +5182 -0
  14. data/test/results/801-1stLecture.ttml.xml +757 -0
  15. data/test/results/801-lect01-4730.xml +757 -0
  16. data/test/results/801-lect02-4731.xml +886 -0
  17. data/test/results/801-lect03-4732.xml +818 -0
  18. data/test/results/801-lect04-4733.xml +831 -0
  19. data/test/results/801-lect05-4734.xml +879 -0
  20. data/test/results/801-lect06-4735.xml +822 -0
  21. data/test/results/801-lect07-4736.xml +893 -0
  22. data/test/results/801-lect08-4737.xml +809 -0
  23. data/test/results/801-lect09-4738.xml +807 -0
  24. data/test/results/Audio-Open-The_New_Deal_for_Education.xml +4301 -0
  25. data/test/test_metadatareader.rb +8 -3
  26. data/test/test_options.rb +8 -1
  27. data/test/test_runner.rb +34 -1
  28. data/test/test_transcript.rb +109 -12
  29. data/test/test_ttmlreader.rb +104 -0
  30. data/test/test_wrdreader.rb +24 -9
  31. metadata +47 -148
  32. data/lib/sm_transcript/optparseExample.rb +0 -113
  33. data/lib/sm_transcript/process_csv_files_to_html.rb +0 -58
  34. data/lib/sm_transcript/process_seg_files.rb +0 -21
  35. data/lib/sm_transcript/process_seg_files_to_csv.rb +0 -24
  36. data/lib/sm_transcript/process_seg_files_to_html.rb +0 -31
  37. data/lib/sm_transcript/require_relative.rb +0 -14
  38. data/test/transcripts/GardnerRileyInterview.t1.html +0 -247
  39. data/test/transcripts/IIHS_Diane_Davis_Nov2009-t1.html +0 -148
  40. data/test/transcripts/NERCOMP-SpokenMedia4.t1.html +0 -2178
  41. data/test/transcripts/data.js +0 -24
  42. data/test/transcripts/vijay_kumar-1.-t1.html +0 -557
  43. data/test/transcripts/vijay_kumar-1.t1.html +0 -558
  44. data/test/transcripts/vijay_kumar-t1.html +0 -558
  45. data/test/transcripts/vijay_kumar-t1.ttml +0 -570
  46. data/test/transcripts/vijay_kumar.data.js +0 -2
  47. data/test/transcripts/vijay_kumar.t1.html +0 -557
  48. data/test/transcripts/wirehair-beetle.data.js +0 -24
@@ -1,4 +1,4 @@
1
- # $Id: test_metadatareader.rb 192 2010-03-27 01:24:26Z pwilkins $
1
+ # $Id: test_metadatareader.rb 196 2010-06-11 18:51:18Z pwilkins $
2
2
  # Copyright (c) 2010 Massachusetts Institute of Technology
3
3
  # see LICENSE.txt for license text
4
4
 
@@ -10,13 +10,13 @@ require_relative '../lib/sm_transcript/metadata_reader'
10
10
 
11
11
  class TestMetadataReader < Test::Unit::TestCase
12
12
 
13
- context "Specifying a metadata file the .to_file method" do
13
+ context "Specifying a .TXT metadata file the .to_file method" do
14
14
  should "return an object (you call that a test?)" do
15
15
  obj = SmTranscript::MetadataReader.from_file("results/wirehair-beetle.txt")
16
16
  assert_not_nil(obj)
17
17
  end
18
18
 
19
- should "read the content and place it in fields " do
19
+ should "read the plain text and place it in fields " do
20
20
  md = SmTranscript::MetadataReader.from_file("results/wirehair-beetle.txt")
21
21
  # p md
22
22
  assert_equal 'Nimrod Olson', md.metadata['name'].nil? ? 'empty' : md.metadata['name'].chomp
@@ -27,4 +27,9 @@ class TestMetadataReader < Test::Unit::TestCase
27
27
  assert_equal "aboriginal_hairstyling.mov", md.metadata['video'].nil? ? 'empty' : md.metadata['video'].chomp
28
28
  end
29
29
  end
30
+
31
+ # context "Specifying a XML metadata file the .to_file method" do
32
+ # should "read the DC elements and place it in fields"
33
+ # end
34
+
30
35
  end
@@ -1,4 +1,4 @@
1
- # $Id: test_options.rb 192 2010-03-27 01:24:26Z pwilkins $
1
+ # $Id: test_options.rb 196 2010-06-11 18:51:18Z pwilkins $
2
2
  # Copyright (c) 2010 Massachusetts Institute of Technology
3
3
  # see LICENSE.txt for license text
4
4
 
@@ -36,6 +36,13 @@ class TestOptions < Test::Unit::TestCase
36
36
  assert_equal 'wrd', opts.srctype
37
37
  end
38
38
 
39
+ should "read srctype:ttml, desttype:json" do
40
+ opts = SmTranscript::Options.new([
41
+ '--desttype', 'json', '--srctype', 'ttml'])
42
+ assert_equal 'json', opts.desttype
43
+ assert_equal 'ttml', opts.srctype
44
+ end
45
+
39
46
  should "read srctype:wrd, desttype:datajs" do
40
47
  opts = SmTranscript::Options.new([
41
48
  '--desttype', 'datajs', '--srctype', 'wrd'])
@@ -1,4 +1,4 @@
1
- # $Id: test_runner.rb 192 2010-03-27 01:24:26Z pwilkins $
1
+ # $Id: test_runner.rb 196 2010-06-11 18:51:18Z pwilkins $
2
2
  # Copyright (c) 2010 Massachusetts Institute of Technology
3
3
  # see LICENSE.txt for license text
4
4
 
@@ -42,6 +42,39 @@ class TestRunner < Test::Unit::TestCase
42
42
  runner.run
43
43
  end
44
44
 
45
+ fname01 = '801-1stLecture.ttml.xml'
46
+ fname02 = 'Audio-Open-The_New_Deal_for_Education.xml'
47
+
48
+ should "return specified ttml srctype, and default values" do
49
+ runner = SmTranscript::Runner.new(["--srctype", 'xml', '--desttype', 'html'])
50
+ opts = runner.options
51
+ assert File.exists?("#{opts.srcdir}/#{fname01}"),
52
+ "Source file not found: #{opts.srcdir}/#{fname01}"
53
+ assert_equal './transcripts', opts.destdir
54
+ assert_equal './results', opts.srcdir
55
+ assert_equal 'xml', opts.srctype
56
+ assert_equal 'html', opts.desttype
57
+ runner.run
58
+ assert(File.exists?("#{opts.destdir}/#{fname01}-t1.html"),
59
+ "File not found: #{opts.destdir}/#{fname01}-t1.html")
60
+ end
61
+
62
+ fname03 = '8.01-1999-L01.wrd'
63
+
64
+ should "process .wrd file to .ttml, and default values" do
65
+ runner = SmTranscript::Runner.new(["--srctype", 'wrd', '--desttype', 'ttml'])
66
+ opts = runner.options
67
+ assert File.exists?("#{opts.srcdir}/#{fname03}"),
68
+ "Source file not found: #{opts.srcdir}/#{fname03}"
69
+ assert_equal './transcripts', opts.destdir
70
+ assert_equal './results', opts.srcdir
71
+ assert_equal 'wrd', opts.srctype
72
+ assert_equal 'ttml', opts.desttype
73
+ runner.run
74
+ assert(File.exists?("#{opts.destdir}/#{fname03}-t1.ttml"),
75
+ "File not found: #{opts.destdir}/#{fname03}-t1.ttml")
76
+ end
77
+
45
78
  # I don't know how to test for the "invalid option" error that this test causes.
46
79
  # should "return display usage information and optionally an error msg" do
47
80
  # SmTranscript::Runner.new(["--niblick-mashie"])
@@ -1,4 +1,4 @@
1
- # $Id: test_transcript.rb 192 2010-03-27 01:24:26Z pwilkins $
1
+ # $Id: test_transcript.rb 196 2010-06-11 18:51:18Z pwilkins $
2
2
  # Copyright (c) 2010 Massachusetts Institute of Technology
3
3
  # see LICENSE.txt for license text
4
4
 
@@ -9,10 +9,10 @@ require 'shoulda'
9
9
  require_relative '../lib/sm_transcript/transcript'
10
10
  require_relative '../lib/sm_transcript/seg_reader'
11
11
  require_relative '../lib/sm_transcript/wrd_reader'
12
+ require_relative '../lib/sm_transcript/ttml_reader'
12
13
  require_relative '../lib/sm_transcript/word'
13
14
 
14
15
  class TestTranscript < Test::Unit::TestCase
15
- # words = []
16
16
 
17
17
  # context "write transcript to HTML in default dest dir" do
18
18
  # should "create transcript file in ./transcripts" do
@@ -26,37 +26,134 @@ class TestTranscript < Test::Unit::TestCase
26
26
  # end
27
27
  # end
28
28
 
29
- context "write transcript to HTML in default dest dir and call SegReader" do
29
+ context "Call SegReader, write transcript to HTML in default dest dir" do
30
30
  should "create transcript file in ./transcripts" do
31
31
  segfile = SmTranscript::SegReader.from_file("results/IIHS_Diane_Davis_Nov2009.seg")
32
32
  t = SmTranscript::Transcript.new(segfile.words)
33
+ assert_not_nil(t, "unable to create words array from ./results/IIHS_Diane_Davis_Nov2009.seg")
34
+ assert(t.words.length > 1, "@words array contains single phrase")
33
35
  t.write_html("transcripts/IIHS_Diane_Davis_Nov2009-t1.html")
34
36
 
35
37
  # for now just check for the existence of a file
36
- assert(File.exists?("transcripts/IIHS_Diane_Davis_Nov2009-t1.html"))
38
+ assert(File.exists?("transcripts/IIHS_Diane_Davis_Nov2009-t1.html"),
39
+ "File not found: transcripts/IIHS_Diane_Davis_Nov2009-t1.html")
37
40
  end
38
41
  end
39
42
 
40
- context "Calling WrdReader, writing transcript to HTML in default dest dir" do
43
+ context "Call WrdReader, write transcript to HTML in default dest dir" do
44
+
45
+ fname03 = 'vijay_kumar.wrd'
46
+ fext = '-t1.ttml'
47
+
41
48
  should "create transcript file in ./transcripts" do
42
- wrdfile = SmTranscript::WrdReader.from_file("results/vijay_kumar.wrd")
49
+ wrdfile = SmTranscript::WrdReader.from_file("results/#{fname03}")
43
50
  t = SmTranscript::Transcript.new(wrdfile.words)
51
+ assert_not_nil(t, "unable to create words array from ./results/#{fname03}")
52
+ assert(t.words.length > 1, "@words array contains single phrase")
44
53
  t.write_html("transcripts/vijay_kumar-t1.html")
45
54
 
46
55
  # for now just check for the existence of a file
47
- assert(File.exists?("transcripts/vijay_kumar-t1.html"))
56
+ assert(File.exists?("transcripts/vijay_kumar-t1.html"),
57
+ "File not found: transcripts/vijay_kumar-t1.html")
58
+ end
59
+
60
+ should "correct known phrases" do
61
+ # <span id='T9'>of MIT</span>
62
+ str_found = false
63
+ File.open("transcripts/vijay_kumar-t1.html").each do |ln|
64
+ if !ln.index("T9'\>of MIT").nil?
65
+ str_found = true
66
+ end
67
+ end
68
+ # assert(str_found)
69
+ end
70
+
71
+ should "create ttml transcript file for #{fname03}" do
72
+ wrdfile = SmTranscript::WrdReader.from_file("results/#{fname03}")
73
+ t = SmTranscript::Transcript.new(wrdfile.words)
74
+ assert_not_nil(t, "unable to create words array from ./results/#{fname03}")
75
+ assert(t.words.length > 1, "@words array contains single phrase")
76
+ t.write_ttml("transcripts/#{fname03}#{fext}")
77
+
78
+ # for now just check for the existence of a file
79
+ assert(File.exists?("transcripts/#{fname03}#{fext}"),
80
+ "File not found: transcripts/#{fname03}#{fext}")
81
+ end
82
+
83
+ fname04 = '8.01-1999-L01.wrd'
84
+ should "create tt transcript file for #{fname04}" do
85
+ wrdfile = SmTranscript::WrdReader.from_file("results/#{fname04}")
86
+ t = SmTranscript::Transcript.new(wrdfile.words)
87
+ assert_not_nil(t, "unable to create words array from ./results/#{fname04}")
88
+ assert(t.words.length > 1, "@words array contains single phrase")
89
+ t.write_ttml("transcripts/#{fname04}#{fext}")
90
+
91
+ # for now just check for the existence of a file
92
+ assert(File.exists?("transcripts/#{fname04}#{fext}"),
93
+ "File not found: transcripts/#{fname04}#{fext}")
48
94
  end
49
95
  end
50
96
 
51
- context "Calling WrdReader, writing transcript to timedText in default dest dir" do
52
- should "create tt transcript file in ./transcripts" do
53
- wrdfile = SmTranscript::WrdReader.from_file("results/vijay_kumar.wrd")
97
+ context "Calling TtmlReader, writing transcript to HTML in default dest dir" do
98
+
99
+ fname01 = '801-1stLecture.ttml.xml'
100
+ fname02 = 'Audio-Open-The_New_Deal_for_Education.xml'
101
+ fext = '-t1.html'
102
+
103
+ should "create #{fname01}-t1.html file in ./transcripts" do
104
+ wrdfile = SmTranscript::TtmlReader.from_file("results/#{fname01}")
105
+ t = SmTranscript::Transcript.new(wrdfile.words)
106
+ assert_not_nil(t, "unable to create words array from ./results/#{fname01}")
107
+ assert(t.words.length > 1, "@words array contains single phrase")
108
+ t.write_html("transcripts/#{fname01}#{fext}")
109
+
110
+ # for now just check for the existence of a file
111
+ assert(File.exists?("transcripts/#{fname01}#{fext}"),
112
+ "File not found: transcripts/#{fname01}#{fext}")
113
+ end
114
+
115
+ should "create #{fname02}-t1.html file in ./transcripts" do
116
+ wrdfile = SmTranscript::TtmlReader.from_file("results/#{fname02}")
54
117
  t = SmTranscript::Transcript.new(wrdfile.words)
55
- t.write_ttml("transcripts/vijay_kumar-t1.ttml")
118
+ assert_not_nil(t, "unable to create words array from ./results/#{fname02}")
119
+ assert(t.words.length > 1, "@words array contains single phrase")
120
+ t.write_html("transcripts/#{fname02}#{fext}")
56
121
 
57
122
  # for now just check for the existence of a file
58
- assert(File.exists?("transcripts/vijay_kumar-t1.ttml"))
123
+ assert(File.exists?("transcripts/#{fname02}#{fext}"),
124
+ "File not found: transcripts/#{fname02}#{fext}")
59
125
  end
60
126
  end
61
127
 
128
+
129
+ context "Call TtmlReader, write transcript to TTML in default dest dir" do
130
+
131
+ fname01 = '801-1stLecture.ttml.xml'
132
+ fname02 = '801-lect02-4731.xml'
133
+ fext = '-t1.ttml'
134
+
135
+ should "create #{fname01}#{fext} file in ./transcripts" do
136
+ wrdfile = SmTranscript::TtmlReader.from_file("results/#{fname01}")
137
+ t = SmTranscript::Transcript.new(wrdfile.words)
138
+ assert_not_nil(t, "unable to create words array from ./results/#{fname01}")
139
+ assert(t.words.length > 1, "@words array contains single phrase")
140
+ t.write_ttml("transcripts/#{fname01}#{fext}")
141
+
142
+ # for now just check for the existence of a file
143
+ assert(File.exists?("transcripts/#{fname01}#{fext}"),
144
+ "File not found: transcripts/#{fname01}#{fext}")
145
+ end
146
+
147
+ should "create #{fname02}-t1.html file in ./transcripts" do
148
+ wrdfile = SmTranscript::TtmlReader.from_file("results/#{fname02}")
149
+ t = SmTranscript::Transcript.new(wrdfile.words)
150
+ assert_not_nil(t, "unable to create words array from ./results/#{fname02}")
151
+ assert(t.words.length > 1, "@words array contains single phrase")
152
+ t.write_ttml("transcripts/#{fname02}#{fext}")
153
+
154
+ # for now just check for the existence of a file
155
+ assert(File.exists?("transcripts/#{fname02}#{fext}"),
156
+ "File not found: transcripts/#{fname02}#{fext}")
157
+ end
158
+ end
62
159
  end
@@ -0,0 +1,104 @@
1
+ # $Id $
2
+ # Copyright (c) 2010 Massachusetts Institute of Technology
3
+ # see LICENSE.txt for license text
4
+
5
+ require 'rubygems'
6
+ require 'extensions/kernel'
7
+ require 'test/unit'
8
+ require 'shoulda'
9
+ require_relative '../lib/sm_transcript/ttml_reader'
10
+
11
+ class TestTtmlReader < Test::Unit::TestCase
12
+ fname01 = '801-1stLecture.ttml.xml'
13
+ fname02 = 'Audio-Open-The_New_Deal_for_Education.xml'
14
+ fname03 = '801-lect01-4730.xml'
15
+ fname04 = '801-lect02-4731.xml'
16
+
17
+ context "Specifying a ttml source file" do
18
+ should "verify that the file can be opened" do
19
+ f = SmTranscript::TtmlReader.from_file("results/#{fname01}")
20
+ assert_not_nil(f)
21
+ end
22
+ # should "report if the file can't be found" do
23
+ # wrdfile = SmTranscript::TtmlReader.from_file("results/this_file_doesnt_exist")
24
+ # assert_not_nil(wrdfile)
25
+ # end
26
+ end
27
+
28
+ context "processing a TTML source file" do
29
+ should "create an Array of Word objects from #{fname01}" do
30
+ f = SmTranscript::TtmlReader.from_file("results/#{fname01}")
31
+
32
+ assert_not_nil(f, "unable to create words array from ./results/#{fname01}")
33
+ assert_equal 766, f.words[0].start_time
34
+ assert_equal 2033, f.words[0].end_time
35
+ assert_equal 1267, f.words[0].duration
36
+ assert_equal "I'm Walter Lewin.", f.words[0].word
37
+ end
38
+
39
+ should "create an Array of Word objects from #{fname02}" do
40
+ f = SmTranscript::TtmlReader.from_file("results/#{fname02}")
41
+
42
+ assert_not_nil(f, "unable to create words array from ./results/#{fname02}")
43
+ last_index = f.words.length - 1
44
+ assert_equal 6580, f.words[0].start_time
45
+ assert_equal 8950, f.words[0].end_time
46
+ assert_equal 2370, f.words[0].duration
47
+ assert_equal "Let me welcome all of you on", f.words[0].word
48
+
49
+ assert_equal 8950, f.words[1].start_time
50
+ assert_equal 11320, f.words[1].end_time
51
+ assert_equal 2370, f.words[1].duration
52
+ assert_equal " behalf of MIT, on behalf of the", f.words[1].word
53
+
54
+ assert_equal 3027795, f.words[last_index].start_time
55
+ assert_equal 3033750, f.words[last_index].end_time
56
+ assert_equal 5955, f.words[last_index].duration
57
+ assert_equal " noticed, but thanks a lot.", f.words[last_index].word
58
+ end
59
+
60
+ should "create an Array of Word objects from #{fname03}" do
61
+ f = SmTranscript::TtmlReader.from_file("results/#{fname03}")
62
+
63
+ assert_not_nil(f, "unable to create words array from ./results/#{fname03}")
64
+ last_index = f.words.length - 1
65
+ assert( (last_index + 1) == 1188, "Unexpected number of word entries")
66
+ assert_equal 766, f.words[0].start_time
67
+ assert_equal 2033, f.words[0].end_time
68
+ assert_equal 1267, f.words[0].duration
69
+ assert_equal "I'm Walter Lewin.", f.words[0].word
70
+
71
+ assert_equal 2033, f.words[1].start_time
72
+ assert_equal 3399, f.words[1].end_time
73
+ assert_equal 1366, f.words[1].duration
74
+ assert_equal "I will be your lecturer", f.words[1].word
75
+
76
+ assert_equal 2286666, f.words[last_index].start_time
77
+ assert_equal 2288266, f.words[last_index].end_time
78
+ assert_equal 1600, f.words[last_index].duration
79
+ assert_equal "See you Friday.", f.words[last_index].word
80
+ end
81
+
82
+ should "create an Array of Word objects from #{fname04}" do
83
+ f = SmTranscript::TtmlReader.from_file("results/#{fname04}")
84
+
85
+ assert_not_nil(f, "unable to create words array from ./results/#{fname04}")
86
+ last_index = f.words.length - 1
87
+ assert_equal 1033, f.words[0].start_time
88
+ assert_equal 2516, f.words[0].end_time
89
+ assert_equal 1483, f.words[0].duration
90
+ assert_equal "We will discuss velocities", f.words[0].word
91
+
92
+ assert_equal 2516, f.words[1].start_time
93
+ assert_equal 3999, f.words[1].end_time
94
+ assert_equal 1483, f.words[1].duration
95
+ assert_equal "and acceleration.", f.words[1].word
96
+
97
+ assert_equal 3061566, f.words[last_index].start_time
98
+ assert_equal 3062566, f.words[last_index].end_time
99
+ assert_equal 1000, f.words[last_index].duration
100
+ assert_equal "See you Monday.", f.words[last_index].word
101
+ end
102
+ end
103
+
104
+ end
@@ -1,4 +1,4 @@
1
- # $Id: test_wrdreader.rb 195 2010-04-15 17:29:55Z pwilkins $
1
+ # $Id: test_wrdreader.rb 196 2010-06-11 18:51:18Z pwilkins $
2
2
  # Copyright (c) 2010 Massachusetts Institute of Technology
3
3
  # see LICENSE.txt for license text
4
4
 
@@ -22,24 +22,39 @@ class TestWrdReader < Test::Unit::TestCase
22
22
  end
23
23
 
24
24
  context "processing a WRD source file" do
25
- should "create an Array of Word objects from vijay_kumar.wrd (lf line endings)" do
25
+ should "create Array of Word objects from vijay_kumar.wrd (lf line endings)" do
26
26
  wrdfile = SmTranscript::WrdReader.from_file("results/vijay_kumar.wrd")
27
27
 
28
- assert_not_nil(wrdfile)
28
+ assert_not_nil(wrdfile, "unable to create words array from ./results/vijay_kumar.wrd")
29
29
  assert_equal "5660", wrdfile.words[0].start_time
30
30
  assert_equal "6627", wrdfile.words[0].end_time
31
31
  assert_equal "okay", wrdfile.words[0].word
32
32
  end
33
- end
34
33
 
35
- context "processing a WRD source file" do
36
- should "create an Array of Word objects from GardnerRileyInterview.wrd (crlf line endings)" do
34
+ should "create Array of Word objects from GardnerRileyInterview.wrd (crlf line endings)" do
37
35
  wrdfile = SmTranscript::WrdReader.from_file("results/GardnerRileyInterview.wrd")
38
-
39
- assert_not_nil(wrdfile)
40
- assert_equal "1630", wrdfile.words[0].start_time
36
+
37
+ assert_not_nil(wrdfile, "unable to create words array from ./results/GardnerRileyInterview.wrd")
38
+ assert_equal "1630", wrdfile.words[0].start_time # first word
41
39
  assert_equal "1815", wrdfile.words[0].end_time
42
40
  assert_equal "this", wrdfile.words[0].word
41
+ assert_equal "321696", wrdfile.words[539].start_time # last word
42
+ assert_equal "321785", wrdfile.words[539].end_time
43
+ assert_equal "you", wrdfile.words[539].word
44
+ assert_equal 540, wrdfile.words.length
45
+ end
46
+
47
+ should "create Array of Word objects from 8.01-1999-L01.wrd (lf line endings)" do
48
+ wrdfile = SmTranscript::WrdReader.from_file("results/8.01-1999-L01.wrd")
49
+
50
+ assert_not_nil(wrdfile, "unable to create words array from ./results/8.01-1999-L01.wrd")
51
+ assert_equal "2010", wrdfile.words[0].start_time # first word
52
+ assert_equal "2125", wrdfile.words[0].end_time
53
+ assert_equal "I'm", wrdfile.words[0].word
54
+ assert_equal "2288600", wrdfile.words[5181].start_time # last word
55
+ assert_equal "2290545", wrdfile.words[5181].end_time
56
+ assert_equal "<noise>", wrdfile.words[5181].word
57
+ assert_equal 5182, wrdfile.words.length
43
58
  end
44
59
  end
45
60
 
metadata CHANGED
@@ -1,12 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sm-transcript
3
3
  version: !ruby/object:Gem::Version
4
+ hash: 19
4
5
  prerelease: false
5
6
  segments:
6
7
  - 0
7
8
  - 0
8
- - 4
9
- version: 0.0.4
9
+ - 6
10
+ version: 0.0.6
10
11
  platform: ruby
11
12
  authors:
12
13
  - Peter Wilkins
@@ -14,133 +15,28 @@ autorequire:
14
15
  bindir: bin
15
16
  cert_chain: []
16
17
 
17
- date: 2010-04-15 00:00:00 -04:00
18
+ date: 2010-07-19 00:00:00 -04:00
18
19
  default_executable:
19
20
  dependencies: []
20
21
 
21
- description: "$Id: README.txt 194 2010-03-28 00:09:23Z pwilkins $\n\n\
22
+ description: "$Id: README.txt 196 2010-06-11 18:51:18Z pwilkins $\n\n\
22
23
  sm-transcript reads results of SLS processing and produces transcripts for\n\
23
24
  the SpokenMedia browser. For each file in the source folder whose extension \n\
24
25
  matches the source type, a file of destination type is created in the \n\
25
- destination folder. All of these parameters have default values. \n\n\
26
- Requirements:\n\
27
- \tsm-transcript is written in Ruby and packaged as a RubyGem. Since Ruby is\n\
28
- \tnot a compiled language, you will need to have Ruby installed on your \n\
29
- \tmachine to run sm-transcript. You can determine if Ruby is installed by \n\
30
- \ttyping \"ruby -v\" at a terminal prompt. It should return the version of \n\
31
- \tRuby that is installed. If Ruby is not installed on your machine, contact\n\
32
- \tme (or your local Ruby wizard) for assistance.\n\
33
- \t\n\
34
- Installation:\n\
35
- \tYou can get sm-transcript as either a RubyGem or as source from svn.\n\
36
- \t\n\
37
- \tThe preferred way to install this package is as a Rubygem. You can \n\
38
- \tdownload and install the gem with this command: \n\
39
- \t\n\
40
- \tsudo gem install [--verbose] sm-transcript\n\
41
- \t\n\
42
- \tThis command downloads the most recent version of the gem from rubygems.org\n\
43
- \tand makes it active. Previous versions of the gem remain installed, but \n\
44
- \tare deactivated.\n\
45
- \t\n\
46
- \tYou must use \"sudo\" to properly install the gem. If you execute \"gem \n\
47
- \tinstall\" (omitting the \"sudo\") the gem is installed in your home gem \n\
48
- \trepository and it isn't in your path without additional configuration.\n\
49
- \t\n\
50
- \tNote: You need sudo privileges to run the command as written. If you \n\
51
- \tcan't sudo, then you can install it locally and will need some additional\n\
52
- \tconfiguration. Contact me (or your local Ruby wizard) for assistance. \n\
53
- \t \n\
54
- \tThe executable is now in your path.\n\
55
- \t\n\
56
- \tYou can cleanly uninstall the gem with this command:\n\
57
- \t\n\
58
- \tsudo gem uninstall sm-transcript\t\n\
59
- \t\n\
60
- \tIf you have access to our svn repository, you are welcome to check out the \n\
61
- \tcode. Be warned that the trunk tip is not necessarily stable. It changes \n\
62
- \tfrequently as enhancements (and bug fixes) are added. (note that the\n\
63
- \t'smb_transcript' in the command line below is not a typo. )\n\n\
64
- \tsvn co svn+ssh://svn.mit.edu/oeit-tsa/SMB/smb_transcript/trunk sm_transcript\n\
65
- \t\n\
66
- \tbuild the gem by running this command from the directory you installed the \n\
67
- \tsource.\n\
68
- \t\n\
69
- \trake gem\n\
70
- \t\n\
71
- \tThe gem will be built and put in ./pkg You can now use the gem \n\
72
- \tinstallation instructions above.\n\
73
- \t\n\n\
74
- Using the App:\n\
75
- \tRun with no command line parameters, the app reads *.wrd files out of \n\
76
- \t./results and writes *t1.html files to ./transcripts. These directories\n\
77
- \tare relative to where sm_transcript is called.\n\
78
- \t\n\
79
- \tNote: destination files are overwritten without a warning prompt. If you \n\
80
- \twant to preserve an existing output file, rename it before running the app\n\
81
- \tagain.\n\
82
- \t\n\
83
- \tFor example, run the app by navigating to the bin folder and running \n\n\
84
- \t\tprojects/sm_transcript/bin felix$ sm_transcript\n\
85
- \t\n\
86
- \tThis command run from this folder will read *.wrd files from bin/results\n\
87
- \tand write *-t1.html to bin/transcripts.\n\
88
- \t\n\
89
- \tUsage: sm_transcript [options] \n --srcdir PATH Read files from this folder (Default: ./results)\n --destdir PATH Write files to this folder (Default: ./transcripts)\n --srctype wrd | seg Kind of file to process (Default: wrd)\n --desttype html | ttml | datajs Kind of file to output (Default: html)\n -h, --help Show this message\t\n\n\n\
90
- Troubleshooting:\n\
91
- \tsm-transcript requires additional gems to operate. The RubyGem \n\
92
- \tinstallation should install dependencies automatically, but when it \n\
93
- \tdoesn't, you get an error that includes \n\
94
- \t\n\
95
- \t... no such file to load -- builder (LoadError)\n\
96
- \t\n\
97
- \tin the first few lines when you run sm-transcript, the problem is a \n\
98
- \tmissing dependent gem. (the error above indicates that the Builder \n\
99
- \tgem is missing.) Try installing the missing gem. For the error above,\n\
100
- \tcommand looks like this:\n\
101
- \t\n\
102
- \tsudo gem install builder\n\
103
- \t\n\
104
- \tSee \"Required Gems\" below for more information.\n\
105
- \t\n\
106
- \t\t\n\
107
- Upgrading:\n\
108
- \tYou can easily upgrade by simply executing the same command you used to \n\
109
- \tinstall the gem. Running install again will add the newer version and make\n\
110
- \tit active. By default the most recent version is used, but older versions\n\
111
- \tare still available, simply inactive.\n\
112
- \t\n\
113
- \tIf are using svn, you should already know what to do.\n\
114
- \t\n\
115
- \t\n\
116
- Required Gems:\n\
117
- \tbuilder - create structured data, such as XML\n\
118
- \textensions - added for the 'require_relative' command. (To get this\n\
119
- \t command in Ruby 1.8 you need to install this gem, for Ruby 1.9\n\
120
- \t the command is already part of the core.)\n\
121
- \thtmlentities - html parsing\n\
122
- \tjson - create JSON structured data\n\
123
- \toptparse - option parsing of command line\n\
124
- \tostruct - open data structures\n\
125
- \tppcommand - pp is a pretty printer. It is used only for debugging\n\
126
- \trake - make for Ruby\n\
127
- \trubygems - support for gems (shouldn't be needed for Ruby 1.9)\n\
128
- \tshoulda - enhancement for Test::Unit\n\
129
- \t\t\n\
130
- \tThis command installs gems on OSX and Linux:\n\
131
- \tfelix$ sudo gem install <gem name>\n\
132
- \t\n\
133
- Unit Tests:\n\
134
- \tYou may run all unit tests by navigating to the test folder and running \n\
135
- \trake with no parameters (the default rake task runs all tests):\n\n\
136
- \tprojects/sm_transcript/test felix$ rake \n\n\n\
137
- Release Notes:\n\
138
- \tInitial Version - runs under Ruby 1.8. \n\n\
139
- To Do:\n\
140
- \tupdate code to run under Ruby 1.9\n\n\
141
- \tMake this a rubygem, making it available from an OEIT server, rather than\n\
142
- \tfrom a public gem repository like RubyForge.\n\
143
- \t"
26
+ destination folder. All of these parameters have default values.\n\n\
27
+ Note: Examples of the commands you enter in the terminal are for *nix. The\n\
28
+ command prompt in the examples is:\n\n\
29
+ felix$ <command line>\n\n\
30
+ If you are a Windows user, make the usual adjustments.\n\n\
31
+ Requirements:\n sm-transcript is written in Ruby and packaged as a RubyGem. Since Ruby is\n not a compiled language, you will need to have Ruby installed on your \n machine to run sm-transcript. You can determine if Ruby is installed by \n typing \"ruby -v\" at a terminal prompt. It should return the version of \n Ruby that is installed. If Ruby is not installed on your machine, contact\n me (or your local Ruby wizard) for assistance.\n \n\
32
+ Installation:\n You can get sm-transcript as either a RubyGem or as source from svn.\n \n The preferred way to install this package is as a Rubygem. You can \n download and install the gem with this command: \n \n felix$ sudo gem install [--verbose] sm-transcript\n \n This command downloads the most recent version of the gem from rubygems.org\n and makes it active. Previous versions of the gem remain installed, but \n are deactivated.\n \n You must use \"sudo\" to properly install the gem. If you execute \"gem \n install\" (omitting the \"sudo\") the gem is installed in your home gem \n repository and it isn't in your path without additional configuration.\n \n Note: You need sudo privileges to run the command as written. If you \n can't sudo, then you can install it locally and will need some additional\n configuration. Contact me (or your local Ruby wizard) for assistance. \n \n The executable is now in your path.\n \n You can cleanly uninstall the gem with this command:\n \n felix$ sudo gem uninstall sm-transcript \n \n If you have access to our svn repository, you are welcome to check out the \n code. Be warned that the trunk tip is not necessarily stable. It changes \n frequently as enhancements (and bug fixes) are added. (note that the\n 'smb_transcript' in the command line below is not a typo.)\n\n svn co svn+ssh://svn.mit.edu/oeit-tsa/SMB/smb_transcript/trunk sm_transcript\n \n build the gem by running this command from the directory you installed the \n source. This is what it looks like on my machine:\n \n felix$ rake gem\n \n The gem will be built and put in ./pkg You can now use the gem \n installation instructions above.\n \n\n\
33
+ Using the App:\n Run with no command line parameters, the app reads *.wrd files out of \n ./results and writes *t1.html files to ./transcripts. These directories\n are relative to where sm_transcript is called.\n \n Note: destination files are overwritten without a warning prompt. If you \n want to preserve an existing output file, rename it before running the app\n again.\n \n For example, run the app by navigating to the bin folder and enter \n\n projects/sm_transcript/bin felix$ sm_transcript\n \n This command run from this folder will read *.wrd files from bin/results\n and write *-t1.html to bin/transcripts.\n \n Usage: sm_transcript [options] \n --srcdir PATH Read files from this folder (Default: ./results)\n --destdir PATH Write files to this folder (Default: ./transcripts)\n --srctype wrd | seg | txt | ttml Kind of file to process (Default: wrd)\n --desttype html | ttml | datajs | json Kind of file to output (Default: html)\n -h, --help Show this message \n\n\n\
34
+ Troubleshooting:\n sm-transcript requires additional gems to operate. The RubyGem \n installation should install dependencies automatically, but when it \n doesn't, you get an error that includes \n \n ... no such file to load -- builder (LoadError)\n \n in the first few lines when you run sm-transcript, the problem is a \n missing dependent gem. (the error above indicates that the Builder \n gem is missing.) Try installing the missing gem. For the error above,\n the command looks like this on my computer:\n \n felix$ sudo gem install builder\n \n See \"Required Gems\" below for more information.\n \n \n A warning message such as:\n \n \"WARNING: Nokogiri was built against LibXML version 2.7.6, \n but has dynamically loaded 2.7.7\"\"\n \n may be safely ignored.\n \n \n\
35
+ Upgrading:\n You can easily upgrade by simply executing the same command you used to \n install the gem. Running install again will add the newer version and make\n it active. By default the most recent version is used, but older versions\n are still available, simply inactive.\n \n If are using svn, you should already know what to do.\n \n \n\
36
+ Required Gems:\n builder - create structured data, such as XML\n extensions - added for the 'require_relative' command. (To get this\n command in Ruby 1.8 you need to install this gem, for Ruby 1.9\n the command is already part of the core.)\n htmlentities - html parsing\n json - create JSON structured data\n optparse - option parsing of command line\n ostruct - open data structures\n ppcommand - pp is a pretty printer. It is used only for debugging\n rake - make for Ruby\n rubygems - support for gems (shouldn't be needed for Ruby 1.9)\n shoulda - enhancement for Test::Unit\n \n This command installs gems on OSX and Linux:\n felix$ sudo gem install <gem name>\n \n\
37
+ Unit Tests:\n You may run all unit tests by navigating to the test folder and running \n rake with no parameters (the default rake task runs all tests). On my\n computer, it looks like this:\n\n projects/sm_transcript/test felix$ rake \n\n\n\
38
+ Release Notes:\n Initial Version - runs under Ruby 1.8.x. \n version 0.0.4 - fixes bug when processing .WRD files with CRLF line\n endings.\n version 0.0.5 - added srctype of ttml and desttype of json, fixed bug\n where beginning time of word was actually for previous word.\n\n\
39
+ To Do:\n specify individual files for processing rather than folders\n update code to run under Ruby 1.9\n\n\n "
144
40
  email: pwilkins@mit.edu
145
41
  executables:
146
42
  - sm-transcript
@@ -153,47 +49,45 @@ files:
153
49
  - lib/sm_transcript/metadata.rb
154
50
  - lib/sm_transcript/metadata_reader.rb
155
51
  - lib/sm_transcript/options.rb
156
- - lib/sm_transcript/optparseExample.rb
157
- - lib/sm_transcript/process_csv_files_to_html.rb
158
- - lib/sm_transcript/process_seg_files.rb
159
- - lib/sm_transcript/process_seg_files_to_csv.rb
160
- - lib/sm_transcript/process_seg_files_to_html.rb
161
- - lib/sm_transcript/require_relative.rb
162
52
  - lib/sm_transcript/runner.rb
163
53
  - lib/sm_transcript/seg_reader.rb
164
54
  - lib/sm_transcript/transcript.rb
55
+ - lib/sm_transcript/ttml_reader.rb
165
56
  - lib/sm_transcript/word.rb
166
57
  - lib/sm_transcript/wrd_reader.rb
167
58
  - bin/sm-transcript
168
59
  - bin/results/PLACEHOLDER.txt
169
60
  - bin/transcripts/PLACEHOLDER.txt
170
61
  - test/Rakefile
171
- - test/results/GardnerRileyInterview.wrd
172
- - test/results/IIHS_Diane_Davis_Nov2009.seg
173
- - test/results/NERCOMP-SpokenMedia4.wrd
174
- - test/results/PLACEHOLDER.txt
175
- - test/results/PLACEHOLDER.txt.ignore
176
- - test/results/vijay_kumar.wrd
177
- - test/results/wirehair-beetle.txt
178
62
  - test/test_metadata.rb
179
63
  - test/test_metadatareader.rb
180
64
  - test/test_options.rb
181
65
  - test/test_runner.rb
182
66
  - test/test_segreader.rb
183
67
  - test/test_transcript.rb
68
+ - test/test_ttmlreader.rb
184
69
  - test/test_wrdreader.rb
185
- - test/transcripts/data.js
186
- - test/transcripts/GardnerRileyInterview.t1.html
187
- - test/transcripts/IIHS_Diane_Davis_Nov2009-t1.html
188
- - test/transcripts/NERCOMP-SpokenMedia4.t1.html
70
+ - test/results/18.03-2004-L01.align2.wrd
71
+ - test/results/8.01-1999-L01.wrd
72
+ - test/results/801-1stLecture.ttml.xml
73
+ - test/results/801-lect01-4730.xml
74
+ - test/results/801-lect02-4731.xml
75
+ - test/results/801-lect03-4732.xml
76
+ - test/results/801-lect04-4733.xml
77
+ - test/results/801-lect05-4734.xml
78
+ - test/results/801-lect06-4735.xml
79
+ - test/results/801-lect07-4736.xml
80
+ - test/results/801-lect08-4737.xml
81
+ - test/results/801-lect09-4738.xml
82
+ - test/results/Audio-Open-The_New_Deal_for_Education.xml
83
+ - test/results/GardnerRileyInterview.wrd
84
+ - test/results/IIHS_Diane_Davis_Nov2009.seg
85
+ - test/results/NERCOMP-SpokenMedia4.wrd
86
+ - test/results/PLACEHOLDER.txt
87
+ - test/results/PLACEHOLDER.txt.ignore
88
+ - test/results/vijay_kumar.wrd
89
+ - test/results/wirehair-beetle.txt
189
90
  - test/transcripts/PLACEHOLDER.txt
190
- - test/transcripts/vijay_kumar-1.-t1.html
191
- - test/transcripts/vijay_kumar-1.t1.html
192
- - test/transcripts/vijay_kumar-t1.html
193
- - test/transcripts/vijay_kumar-t1.ttml
194
- - test/transcripts/vijay_kumar.data.js
195
- - test/transcripts/vijay_kumar.t1.html
196
- - test/transcripts/wirehair-beetle.data.js
197
91
  - README.txt
198
92
  - LICENSE.txt
199
93
  - Rakefile
@@ -207,24 +101,28 @@ rdoc_options: []
207
101
  require_paths:
208
102
  - lib
209
103
  required_ruby_version: !ruby/object:Gem::Requirement
104
+ none: false
210
105
  requirements:
211
106
  - - ">="
212
107
  - !ruby/object:Gem::Version
108
+ hash: 31
213
109
  segments:
214
110
  - 1
215
111
  - 8
216
112
  version: "1.8"
217
113
  required_rubygems_version: !ruby/object:Gem::Requirement
114
+ none: false
218
115
  requirements:
219
116
  - - ">="
220
117
  - !ruby/object:Gem::Version
118
+ hash: 3
221
119
  segments:
222
120
  - 0
223
121
  version: "0"
224
122
  requirements:
225
123
  - TBD
226
124
  rubyforge_project:
227
- rubygems_version: 1.3.6
125
+ rubygems_version: 1.3.7
228
126
  signing_key:
229
127
  specification_version: 3
230
128
  summary: Convert word lists to transcripts
@@ -235,4 +133,5 @@ test_files:
235
133
  - test/test_runner.rb
236
134
  - test/test_segreader.rb
237
135
  - test/test_transcript.rb
136
+ - test/test_ttmlreader.rb
238
137
  - test/test_wrdreader.rb