sm-transcript 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
@@ -1,4 +1,4 @@
1
- # $Id: test_runner.rb 196 2010-06-11 18:51:18Z pwilkins $
1
+ # $Id: test_runner.rb 202 2010-10-30 02:47:21Z pwilkins $
2
2
  # Copyright (c) 2010 Massachusetts Institute of Technology
3
3
  # see LICENSE.txt for license text
4
4
 
@@ -75,6 +75,34 @@ class TestRunner < Test::Unit::TestCase
75
75
  "File not found: #{opts.destdir}/#{fname03}-t1.ttml")
76
76
  end
77
77
 
78
+ fname04 = 'ocw-18_02-f07-lec01_220k.srt'
79
+ fname05 = '3.091-04.srt'
80
+ fname06 = '5.60-01.SRT'
81
+
82
+ should "process .srt file to .html, and default values" do
83
+ runner = SmTranscript::Runner.new(["--srctype", 'srt', '--desttype', 'html'])
84
+ opts = runner.options
85
+ assert File.exists?("#{opts.srcdir}/#{fname04}"),
86
+ "Source file not found: #{opts.srcdir}/#{fname04}"
87
+ assert File.exists?("#{opts.srcdir}/#{fname05}"),
88
+ "Source file not found: #{opts.srcdir}/#{fname05}"
89
+ assert File.exists?("#{opts.srcdir}/#{fname06}"),
90
+ "Source file not found: #{opts.srcdir}/#{fname06}"
91
+
92
+ assert_equal './transcripts', opts.destdir
93
+ assert_equal './results', opts.srcdir
94
+ assert_equal 'srt', opts.srctype
95
+ assert_equal 'html', opts.desttype
96
+ runner.run
97
+
98
+ assert(File.exists?("#{opts.destdir}/#{fname04}-t1.ttml"),
99
+ "File not found: #{opts.destdir}/#{fname04}-t1.ttml")
100
+ assert(File.exists?("#{opts.destdir}/#{fname05}-t1.ttml"),
101
+ "File not found: #{opts.destdir}/#{fname05}-t1.ttml")
102
+ assert(File.exists?("#{opts.destdir}/#{fname06}-t1.ttml"),
103
+ "File not found: #{opts.destdir}/#{fname06}-t1.ttml")
104
+ end
105
+
78
106
  # I don't know how to test for the "invalid option" error that this test causes.
79
107
  # should "return display usage information and optionally an error msg" do
80
108
  # SmTranscript::Runner.new(["--niblick-mashie"])
@@ -1,4 +1,4 @@
1
- # $Id: test_segreader.rb 192 2010-03-27 01:24:26Z pwilkins $
1
+ # $Id: test_srtreader.rb 192 2010-03-27 01:24:26Z pwilkins $
2
2
  # Copyright (c) 2010 Massachusetts Institute of Technology
3
3
  # see LICENSE.txt for license text
4
4
 
@@ -6,20 +6,20 @@ require 'rubygems'
6
6
  require 'extensions/kernel'
7
7
  require 'test/unit'
8
8
  require 'shoulda'
9
- require_relative '../lib/sm_transcript/seg_reader'
9
+ require_relative '../lib/sm_transcript/srt_reader'
10
10
 
11
- class TestSegReader < Test::Unit::TestCase
11
+ class TestSrtReader < Test::Unit::TestCase
12
12
 
13
13
  context "app can find the seg file" do
14
14
  should "verify that instance is not nil" do
15
- segfile = SmTranscript::SegReader.from_file("results/IIHS_Diane_Davis_Nov2009.seg")
15
+ segfile = SmTranscript::SrtReader.from_file("results/IIHS_Diane_Davis_Nov2009.seg")
16
16
  assert_not_nil(segfile)
17
17
  end
18
18
  end
19
19
 
20
20
  context "read a metadata item from seg file" do
21
21
  should "return seg file name" do
22
- segfile = SmTranscript::SegReader.from_file("results/IIHS_Diane_Davis_Nov2009.seg")
22
+ segfile = SmTranscript::SrtReader.from_file("results/IIHS_Diane_Davis_Nov2009.seg")
23
23
 
24
24
  assert_equal "IIHS_Diane_Davis_Nov2009.seg",
25
25
  segfile.metadata["orig_seg_path"].to_s
@@ -28,7 +28,7 @@ class TestSegReader < Test::Unit::TestCase
28
28
 
29
29
  context "read a time-coded word from seg file" do
30
30
  should "return first time-coded word in transcript" do
31
- segfile = SmTranscript::SegReader.from_file("results/IIHS_Diane_Davis_Nov2009.seg")
31
+ segfile = SmTranscript::SrtReader.from_file("results/IIHS_Diane_Davis_Nov2009.seg")
32
32
 
33
33
  assert_equal "11406", segfile.words[0].start_time
34
34
  assert_equal "11500", segfile.words[0].end_time
@@ -0,0 +1,141 @@
1
+ # $Id: test_srtreader.rb 203 2010-10-30 09:45:38Z pwilkins $
2
+ # Copyright (c) 2010 Massachusetts Institute of Technology
3
+ # see LICENSE.txt for license text
4
+
5
+ require 'rubygems'
6
+ require 'extensions/kernel'
7
+ require 'test/unit'
8
+ require 'shoulda'
9
+ require_relative '../lib/sm_transcript/srt_reader'
10
+
11
+ class TestSrtReader < Test::Unit::TestCase
12
+
13
+ context "app can find the srt file" do
14
+ should "verify that instance is not nil" do
15
+ srtfile01 = SmTranscript::SrtReader.from_file("results/ocw-18_02-f07-lec01_220k.srt")
16
+ assert_not_nil(srtfile01)
17
+ end
18
+ end
19
+
20
+ context "convert a rfc2284 time code" do
21
+ should "convert time code to seconds as an integer" do
22
+ srtfile01 = SmTranscript::SrtReader.from_file("results/ocw-18_02-f07-lec01_220k.srt")
23
+ assert_equal 0, srtfile01.get_millisecs("00:00,0")
24
+ assert_equal 0, srtfile01.get_millisecs("00:00,00")
25
+ assert_equal 0, srtfile01.get_millisecs("00:00,000")
26
+ assert_equal 1000, srtfile01.get_millisecs("00:01,0")
27
+ assert_equal 1000, srtfile01.get_millisecs("00:01,00")
28
+ assert_equal 1000, srtfile01.get_millisecs("00:01,000")
29
+ assert_equal 0, srtfile01.get_millisecs("00:00:00")
30
+ assert_equal 0, srtfile01.get_millisecs("00:00:00,0")
31
+ assert_equal 0, srtfile01.get_millisecs("00:00:00,00")
32
+ assert_equal 0, srtfile01.get_millisecs("00:00:00,000")
33
+ assert_equal 1000, srtfile01.get_millisecs("00:00:01")
34
+ assert_equal 1000, srtfile01.get_millisecs("00:00:01,0")
35
+ assert_equal 1000, srtfile01.get_millisecs("00:00:01,00")
36
+ assert_equal 1000, srtfile01.get_millisecs("00:00:01,000")
37
+ assert_equal 60000, srtfile01.get_millisecs("00:01:00")
38
+ assert_equal 600000, srtfile01.get_millisecs("00:10:00,000")
39
+ assert_equal 600001, srtfile01.get_millisecs("00:10:00,001")
40
+ assert_equal 600999, srtfile01.get_millisecs("00:10:00,999")
41
+ assert_equal 3600000, srtfile01.get_millisecs("01:00:00,000")
42
+ end
43
+ end
44
+
45
+ context "read a time-coded phrase from srt file" do
46
+ should "return time-coded phrases in transcript" do
47
+ srtfile01 = SmTranscript::SrtReader.from_file("results/ocw-18_02-f07-lec01_220k.srt")
48
+
49
+ # p srtfile01.words[0].start_time
50
+ # p srtfile01.words[0].end_time
51
+ # p srtfile01.words[0].word
52
+ assert_equal 22350, srtfile01.words[0].start_time
53
+ assert_equal 0, srtfile01.words[0].end_time
54
+ assert_equal "So let's start right away with stuff that we will need to see", srtfile01.words[0].word
55
+
56
+ assert_equal 63700, srtfile01.words[12].start_time
57
+ assert_equal 0, srtfile01.words[12].end_time
58
+ assert_equal "You will see it's pretty easy. So, just to remind you,", srtfile01.words[12].word
59
+
60
+ assert_equal 1027050, srtfile01.words[229].start_time
61
+ assert_equal 0, srtfile01.words[229].end_time
62
+ assert_equal "it's sometimes easy to forget. So, that's the definition.", srtfile01.words[229].word
63
+
64
+ assert_equal 1037757, srtfile01.words[231].start_time
65
+ assert_equal 0, srtfile01.words[231].end_time
66
+ assert_equal "that? That's kind of a strange", srtfile01.words[231].word
67
+
68
+ assert_equal 1618789, srtfile01.words[356].start_time
69
+ assert_equal 0000, srtfile01.words[356].end_time
70
+ assert_equal "And, I have a third point, R at (0,0,2),", srtfile01.words[356].word
71
+
72
+ assert_equal 1714785, srtfile01.words[381].start_time
73
+ assert_equal 0, srtfile01.words[381].end_time
74
+ assert_equal "divide by the lengths.", srtfile01.words[381].word
75
+
76
+ assert_equal 1754630, srtfile01.words[382].start_time
77
+ assert_equal 0, srtfile01.words[382].end_time
78
+ assert_equal "OK, so let's see. So, we said cosine theta is", srtfile01.words[382].word
79
+
80
+ srtfile02 = SmTranscript::SrtReader.from_file("results/3.091-04.srt")
81
+
82
+ # p srtfile02.words[0].start_time
83
+ # p srtfile02.words[0].end_time
84
+ # p srtfile02.words[0].word
85
+ assert_equal 0, srtfile02.words[0].start_time
86
+ assert_equal 0, srtfile02.words[0].end_time
87
+ assert_equal "Let's get started. A couple of announcements.", srtfile02.words[0].word
88
+
89
+ srtfile03 = SmTranscript::SrtReader.from_file("results/5.60-01.SRT")
90
+
91
+ # p srtfile03.words[0].start_time
92
+ # p srtfile03.words[0].end_time
93
+ # p srtfile03.words[0].word
94
+ assert_equal 0, srtfile03.words[0].start_time
95
+ assert_equal 0, srtfile03.words[0].end_time
96
+ assert_equal "The following content is provided under a Creative", srtfile03.words[0].word
97
+
98
+ srtfile04 = SmTranscript::SrtReader.from_file("results/18.06-03.srt")
99
+
100
+ # p srtfile04.words[0].start_time
101
+ # p srtfile04.words[0].end_time
102
+ # p srtfile04.words[0].word
103
+ assert_equal 7121, srtfile04.words[0].start_time
104
+ assert_equal 0, srtfile04.words[0].end_time
105
+ assert_equal "I've been multiplying matrices already, but certainly time for", srtfile04.words[0].word
106
+
107
+ # p srtfile04.words[200].start_time
108
+ # p srtfile04.words[0].end_time
109
+ # p srtfile04.words[200].word
110
+ assert_equal 818393, srtfile04.words[200].start_time
111
+ assert_equal 0, srtfile04.words[200].end_time
112
+ assert_equal "That product there I mean, when I'm just following", srtfile04.words[200].word
113
+
114
+ # p srtfile04.words[400].start_time
115
+ # p srtfile04.words[0].end_time
116
+ # p srtfile04.words[400].word
117
+ assert_equal 1622380, srtfile04.words[400].start_time
118
+ assert_equal 0, srtfile04.words[400].end_time
119
+ assert_equal "So can I get the identity matrix?", srtfile04.words[400].word
120
+
121
+ # p srtfile04.words[701].start_time
122
+ # p srtfile04.words[0].end_time
123
+ # p srtfile04.words[701].word
124
+ assert_equal 2720465, srtfile04.words[701].start_time
125
+ assert_equal 0, srtfile04.words[701].end_time
126
+ assert_equal "So E must be tells us what E is,", srtfile04.words[701].word
127
+
128
+ p srtfile04.words[722].start_time
129
+ p srtfile04.words[0].end_time
130
+ p srtfile04.words[722].word
131
+ assert_equal 2795208, srtfile04.words[722].start_time
132
+ assert_equal 0, srtfile04.words[722].end_time
133
+ assert_equal "See you on Wednesday.", srtfile04.words[722].word
134
+
135
+ p srtfile04.words[723].start_time
136
+
137
+
138
+ end
139
+ end
140
+
141
+ end
@@ -9,6 +9,7 @@ require 'shoulda'
9
9
  require_relative '../lib/sm_transcript/transcript'
10
10
  require_relative '../lib/sm_transcript/seg_reader'
11
11
  require_relative '../lib/sm_transcript/wrd_reader'
12
+ require_relative '../lib/sm_transcript/srt_reader'
12
13
  require_relative '../lib/sm_transcript/ttml_reader'
13
14
  require_relative '../lib/sm_transcript/word'
14
15
 
@@ -94,18 +95,19 @@ class TestTranscript < Test::Unit::TestCase
94
95
  end
95
96
  end
96
97
 
97
- context "Calling TtmlReader, writing transcript to HTML in default dest dir" do
98
+
99
+ context "Call TtmlReader, write transcript to TTML in default dest dir" do
98
100
 
99
101
  fname01 = '801-1stLecture.ttml.xml'
100
- fname02 = 'Audio-Open-The_New_Deal_for_Education.xml'
101
- fext = '-t1.html'
102
+ fname02 = '801-lect02-4731.xml'
103
+ fext = '-t1.ttml'
102
104
 
103
- should "create #{fname01}-t1.html file in ./transcripts" do
105
+ should "create #{fname01}#{fext} file in ./transcripts" do
104
106
  wrdfile = SmTranscript::TtmlReader.from_file("results/#{fname01}")
105
107
  t = SmTranscript::Transcript.new(wrdfile.words)
106
108
  assert_not_nil(t, "unable to create words array from ./results/#{fname01}")
107
109
  assert(t.words.length > 1, "@words array contains single phrase")
108
- t.write_html("transcripts/#{fname01}#{fext}")
110
+ t.write_ttml("transcripts/#{fname01}#{fext}")
109
111
 
110
112
  # for now just check for the existence of a file
111
113
  assert(File.exists?("transcripts/#{fname01}#{fext}"),
@@ -117,7 +119,7 @@ class TestTranscript < Test::Unit::TestCase
117
119
  t = SmTranscript::Transcript.new(wrdfile.words)
118
120
  assert_not_nil(t, "unable to create words array from ./results/#{fname02}")
119
121
  assert(t.words.length > 1, "@words array contains single phrase")
120
- t.write_html("transcripts/#{fname02}#{fext}")
122
+ t.write_ttml("transcripts/#{fname02}#{fext}")
121
123
 
122
124
  # for now just check for the existence of a file
123
125
  assert(File.exists?("transcripts/#{fname02}#{fext}"),
@@ -126,18 +128,19 @@ class TestTranscript < Test::Unit::TestCase
126
128
  end
127
129
 
128
130
 
129
- context "Call TtmlReader, write transcript to TTML in default dest dir" do
131
+ context "Calling SrtReader, write transcript to HTML in default dest dir" do
130
132
 
131
- fname01 = '801-1stLecture.ttml.xml'
132
- fname02 = '801-lect02-4731.xml'
133
- fext = '-t1.ttml'
133
+ fname01 = '3.091-04.srt'
134
+ fname02 = '5.60-01.SRT'
135
+ fname03 = 'ocw-18_02-f07-lec01_220k.srt'
136
+ fext = '-t1.html'
134
137
 
135
- should "create #{fname01}#{fext} file in ./transcripts" do
136
- wrdfile = SmTranscript::TtmlReader.from_file("results/#{fname01}")
138
+ should "create #{fname01}-t1.html file in ./transcripts" do
139
+ wrdfile = SmTranscript::SrtReader.from_file("results/#{fname01}")
137
140
  t = SmTranscript::Transcript.new(wrdfile.words)
138
141
  assert_not_nil(t, "unable to create words array from ./results/#{fname01}")
139
142
  assert(t.words.length > 1, "@words array contains single phrase")
140
- t.write_ttml("transcripts/#{fname01}#{fext}")
143
+ t.write_html("transcripts/#{fname01}#{fext}")
141
144
 
142
145
  # for now just check for the existence of a file
143
146
  assert(File.exists?("transcripts/#{fname01}#{fext}"),
@@ -145,15 +148,29 @@ class TestTranscript < Test::Unit::TestCase
145
148
  end
146
149
 
147
150
  should "create #{fname02}-t1.html file in ./transcripts" do
148
- wrdfile = SmTranscript::TtmlReader.from_file("results/#{fname02}")
151
+ wrdfile = SmTranscript::SrtReader.from_file("results/#{fname02}")
149
152
  t = SmTranscript::Transcript.new(wrdfile.words)
150
153
  assert_not_nil(t, "unable to create words array from ./results/#{fname02}")
151
154
  assert(t.words.length > 1, "@words array contains single phrase")
152
- t.write_ttml("transcripts/#{fname02}#{fext}")
155
+ t.write_html("transcripts/#{fname02}#{fext}")
153
156
 
154
157
  # for now just check for the existence of a file
155
158
  assert(File.exists?("transcripts/#{fname02}#{fext}"),
156
159
  "File not found: transcripts/#{fname02}#{fext}")
157
160
  end
161
+
162
+ should "create #{fname03}-t1.html file in ./transcripts" do
163
+ wrdfile = SmTranscript::SrtReader.from_file("results/#{fname03}")
164
+ t = SmTranscript::Transcript.new(wrdfile.words)
165
+ assert_not_nil(t, "unable to create words array from ./results/#{fname03}")
166
+ assert(t.words.length > 1, "@words array contains single phrase")
167
+ t.write_html("transcripts/#{fname03}#{fext}")
168
+
169
+ # for now just check for the existence of a file
170
+ assert(File.exists?("transcripts/#{fname03}#{fext}"),
171
+ "File not found: transcripts/#{fname03}#{fext}")
172
+ end
158
173
  end
174
+
175
+
159
176
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sm-transcript
3
3
  version: !ruby/object:Gem::Version
4
- hash: 19
4
+ hash: 17
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 6
10
- version: 0.0.6
9
+ - 7
10
+ version: 0.0.7
11
11
  platform: ruby
12
12
  authors:
13
13
  - Peter Wilkins
@@ -15,11 +15,11 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-07-19 00:00:00 -04:00
18
+ date: 2010-11-08 00:00:00 -05:00
19
19
  default_executable:
20
20
  dependencies: []
21
21
 
22
- description: "$Id: README.txt 196 2010-06-11 18:51:18Z pwilkins $\n\n\
22
+ description: "$Id: README.txt 200 2010-10-29 18:23:48Z pwilkins $\n\n\
23
23
  sm-transcript reads results of SLS processing and produces transcripts for\n\
24
24
  the SpokenMedia browser. For each file in the source folder whose extension \n\
25
25
  matches the source type, a file of destination type is created in the \n\
@@ -28,15 +28,15 @@ description: "$Id: README.txt 196 2010-06-11 18:51:18Z pwilkins $\n\n\
28
28
  command prompt in the examples is:\n\n\
29
29
  felix$ <command line>\n\n\
30
30
  If you are a Windows user, make the usual adjustments.\n\n\
31
- Requirements:\n sm-transcript is written in Ruby and packaged as a RubyGem. Since Ruby is\n not a compiled language, you will need to have Ruby installed on your \n machine to run sm-transcript. You can determine if Ruby is installed by \n typing \"ruby -v\" at a terminal prompt. It should return the version of \n Ruby that is installed. If Ruby is not installed on your machine, contact\n me (or your local Ruby wizard) for assistance.\n \n\
32
- Installation:\n You can get sm-transcript as either a RubyGem or as source from svn.\n \n The preferred way to install this package is as a Rubygem. You can \n download and install the gem with this command: \n \n felix$ sudo gem install [--verbose] sm-transcript\n \n This command downloads the most recent version of the gem from rubygems.org\n and makes it active. Previous versions of the gem remain installed, but \n are deactivated.\n \n You must use \"sudo\" to properly install the gem. If you execute \"gem \n install\" (omitting the \"sudo\") the gem is installed in your home gem \n repository and it isn't in your path without additional configuration.\n \n Note: You need sudo privileges to run the command as written. If you \n can't sudo, then you can install it locally and will need some additional\n configuration. Contact me (or your local Ruby wizard) for assistance. \n \n The executable is now in your path.\n \n You can cleanly uninstall the gem with this command:\n \n felix$ sudo gem uninstall sm-transcript \n \n If you have access to our svn repository, you are welcome to check out the \n code. Be warned that the trunk tip is not necessarily stable. It changes \n frequently as enhancements (and bug fixes) are added. (note that the\n 'smb_transcript' in the command line below is not a typo.)\n\n svn co svn+ssh://svn.mit.edu/oeit-tsa/SMB/smb_transcript/trunk sm_transcript\n \n build the gem by running this command from the directory you installed the \n source. This is what it looks like on my machine:\n \n felix$ rake gem\n \n The gem will be built and put in ./pkg You can now use the gem \n installation instructions above.\n \n\n\
33
- Using the App:\n Run with no command line parameters, the app reads *.wrd files out of \n ./results and writes *t1.html files to ./transcripts. These directories\n are relative to where sm_transcript is called.\n \n Note: destination files are overwritten without a warning prompt. If you \n want to preserve an existing output file, rename it before running the app\n again.\n \n For example, run the app by navigating to the bin folder and enter \n\n projects/sm_transcript/bin felix$ sm_transcript\n \n This command run from this folder will read *.wrd files from bin/results\n and write *-t1.html to bin/transcripts.\n \n Usage: sm_transcript [options] \n --srcdir PATH Read files from this folder (Default: ./results)\n --destdir PATH Write files to this folder (Default: ./transcripts)\n --srctype wrd | seg | txt | ttml Kind of file to process (Default: wrd)\n --desttype html | ttml | datajs | json Kind of file to output (Default: html)\n -h, --help Show this message \n\n\n\
34
- Troubleshooting:\n sm-transcript requires additional gems to operate. The RubyGem \n installation should install dependencies automatically, but when it \n doesn't, you get an error that includes \n \n ... no such file to load -- builder (LoadError)\n \n in the first few lines when you run sm-transcript, the problem is a \n missing dependent gem. (the error above indicates that the Builder \n gem is missing.) Try installing the missing gem. For the error above,\n the command looks like this on my computer:\n \n felix$ sudo gem install builder\n \n See \"Required Gems\" below for more information.\n \n \n A warning message such as:\n \n \"WARNING: Nokogiri was built against LibXML version 2.7.6, \n but has dynamically loaded 2.7.7\"\"\n \n may be safely ignored.\n \n \n\
31
+ Requirements:\n sm-transcript is written in Ruby and packaged as a RubyGem. Since Ruby is\n not a compiled language, you will need to have Ruby installed on your \n machine to run sm-transcript. You can determine if Ruby is installed by \n typing \"ruby -v\" at a terminal prompt. It should return the version of \n Ruby that is installed. If Ruby is not installed on your machine, navigate \n to http://www.ruby-lang.org/ and follow the installation instructions. \n sm-transcript was developed using Ruby 1.8. Other Ruby versions have not\n been tested as of this release. \n \n\
32
+ Installation:\n You can get sm-transcript as either a RubyGem or as source from svn. \n \n The preferred way to install this package is as a Rubygem. You can \n download and install the gem with this command: \n \n felix$ sudo gem install [--verbose] sm-transcript\n \n This command downloads the most recent version of the gem from rubygems.org\n and makes it active. Previous versions of the gem remain installed, but \n are deactivated.\n \n You must use \"sudo\" to properly install the gem. If you execute \"gem \n install\" (omitting the \"sudo\") the gem is installed in your home gem \n repository and it isn't in your path without additional configuration.\n \n Note: You need sudo privileges to run the command as written. If you \n can't sudo, then you can install it locally and will need some additional\n configuration. Contact me (or your local Ruby wizard) for assistance. \n \n The executable is now in your path.\n \n You can cleanly uninstall the gem with this command:\n \n felix$ sudo gem uninstall sm-transcript \n \n If you have access to our svn repository, you are welcome to check out the \n code. Be warned that the trunk tip is not necessarily stable. It changes \n frequently as enhancements (and bug fixes) are added. (note that the\n 'smb_transcript' in the command line below is not a typo.)\n\n svn co svn+ssh://svn.mit.edu/oeit-tsa/SMB/smb_transcript/trunk sm_transcript\n \n build the gem by running this command from the directory you installed the \n source. This is what it looks like on my machine:\n \n felix$ rake gem\n \n The gem will be built and put in ./pkg You can now use the gem \n installation instructions above.\n \n\n\
33
+ Using the App:\n Run with no command line parameters, the app reads *.wrd files out of \n ./results and writes *.t1.html files to ./transcripts. These directories\n are relative to where sm_transcript is called.\n \n Note: destination files are overwritten without a warning prompt. If you \n want to preserve an existing output file, rename it before running the app\n again.\n \n For example, run the app by navigating to the bin folder and enter \n\n projects/sm_transcript/bin felix$ sm_transcript\n \n This command run from this folder will read *.wrd files from bin/results\n and write *-t1.html to bin/transcripts.\n \n Usage: sm_transcript [options] \n --srcdir PATH Read files from this folder (Default: ./results)\n --destdir PATH Write files to this folder (Default: ./transcripts)\n --srctype wrd | seg | txt | ttml | srt Kind of file to process (Default: wrd)\n --desttype html | ttml | datajs | json Kind of file to output (Default: html)\n -h, --help Show this message \n\n There is a serious gotch'a in specifying the srctype parameter: it must \n match the case of the file extension that you're processing. I know, \n I know; pretty lame. I will update the gem with a fix shortly. My \n apologies until then.\n\n\
34
+ Troubleshooting:\n sm-transcript requires additional gems to operate. The RubyGem \n installation should install dependencies automatically, but when it \n doesn't, you get an error that includes \n \n ... no such file to load -- builder (LoadError)\n \n in the first few lines when you run sm-transcript, the problem is a \n missing dependent gem. (the error above indicates that the Builder \n gem is missing.) Try installing the missing gem. For the error above,\n the command looks like this on my computer:\n \n felix$ sudo gem install builder\n \n See \"Required Gems\" below for more information.\n \n \n A warning message such as:\n \n \"WARNING: Nokogiri was built against LibXML version 2.7.6, \n but has dynamically loaded 2.7.7\"\"\n \n may be safely ignored.\n \n If you continue to have trouble, feel free to contact me.\n \n \n\
35
35
  Upgrading:\n You can easily upgrade by simply executing the same command you used to \n install the gem. Running install again will add the newer version and make\n it active. By default the most recent version is used, but older versions\n are still available, simply inactive.\n \n If are using svn, you should already know what to do.\n \n \n\
36
- Required Gems:\n builder - create structured data, such as XML\n extensions - added for the 'require_relative' command. (To get this\n command in Ruby 1.8 you need to install this gem, for Ruby 1.9\n the command is already part of the core.)\n htmlentities - html parsing\n json - create JSON structured data\n optparse - option parsing of command line\n ostruct - open data structures\n ppcommand - pp is a pretty printer. It is used only for debugging\n rake - make for Ruby\n rubygems - support for gems (shouldn't be needed for Ruby 1.9)\n shoulda - enhancement for Test::Unit\n \n This command installs gems on OSX and Linux:\n felix$ sudo gem install <gem name>\n \n\
36
+ Required Gems:\n builder - create structured data, such as XML\n extensions - added for the 'require_relative' command. (To get this\n command in Ruby 1.8 you need to install this gem, for Ruby 1.9\n the command is already part of the core.)\n htmlentities - html parsing\n json - create JSON structured data\n nokogiri - xml parsing library\n optparse - option parsing of command line\n ostruct - open data structures\n ppcommand - pp is a pretty printer. It is used only for debugging\n rake - make for Ruby\n rubygems - support for gems (shouldn't be needed for Ruby 1.9)\n shoulda - enhancement for Test::Unit\n \n This command installs gems on OSX and Linux:\n felix$ sudo gem install <gem name>\n \n I recommend running the following command to update to latest version of\n rubygems before loading new gems.\n felix$ sudo gem update --system\n \n\
37
37
  Unit Tests:\n You may run all unit tests by navigating to the test folder and running \n rake with no parameters (the default rake task runs all tests). On my\n computer, it looks like this:\n\n projects/sm_transcript/test felix$ rake \n\n\n\
38
- Release Notes:\n Initial Version - runs under Ruby 1.8.x. \n version 0.0.4 - fixes bug when processing .WRD files with CRLF line\n endings.\n version 0.0.5 - added srctype of ttml and desttype of json, fixed bug\n where beginning time of word was actually for previous word.\n\n\
39
- To Do:\n specify individual files for processing rather than folders\n update code to run under Ruby 1.9\n\n\n "
38
+ Release Notes:\n Initial Version - runs under Ruby 1.8.x. \n version 0.0.4 - fixes bug when processing .WRD files with CRLF line\n endings.\n version 0.0.5 - removed due to posting error\n version 0.0.6 - added srctype of ttml and desttype of json, fixed bug\n where beginning time of word was actually for previous word.\n version 0.0.7 - added srt as srctype \n\n\
39
+ To Do:\n - specify individual files for processing rather than folders\n - fix bug in srt processing: can't read Creole srt content.\n - allow user to modify the \"t1\" file extension for addition languages of \n the same transcript.\n - update code to run under Ruby 1.9\n\n\n "
40
40
  email: pwilkins@mit.edu
41
41
  executables:
42
42
  - sm-transcript
@@ -50,7 +50,9 @@ files:
50
50
  - lib/sm_transcript/metadata_reader.rb
51
51
  - lib/sm_transcript/options.rb
52
52
  - lib/sm_transcript/runner.rb
53
+ - lib/sm_transcript/sbv_reader.rb
53
54
  - lib/sm_transcript/seg_reader.rb
55
+ - lib/sm_transcript/srt_reader.rb
54
56
  - lib/sm_transcript/transcript.rb
55
57
  - lib/sm_transcript/ttml_reader.rb
56
58
  - lib/sm_transcript/word.rb
@@ -58,16 +60,25 @@ files:
58
60
  - bin/sm-transcript
59
61
  - bin/results/PLACEHOLDER.txt
60
62
  - bin/transcripts/PLACEHOLDER.txt
63
+ - test/log.txt
64
+ - test/processed_srts.zip
61
65
  - test/Rakefile
66
+ - test/srt_transcripts.zip
62
67
  - test/test_metadata.rb
63
68
  - test/test_metadatareader.rb
64
69
  - test/test_options.rb
65
70
  - test/test_runner.rb
66
71
  - test/test_segreader.rb
72
+ - test/test_srtreader.rb
67
73
  - test/test_transcript.rb
68
74
  - test/test_ttmlreader.rb
69
75
  - test/test_wrdreader.rb
70
76
  - test/results/18.03-2004-L01.align2.wrd
77
+ - test/results/18.06-03.srt
78
+ - test/results/20101018 OCW-18.01-f07-lec02_300k - Haitian Creole.srt
79
+ - test/results/3.091-04.srt
80
+ - test/results/5.60-01.SRT
81
+ - test/results/7.012-01.srt
71
82
  - test/results/8.01-1999-L01.wrd
72
83
  - test/results/801-1stLecture.ttml.xml
73
84
  - test/results/801-lect01-4730.xml
@@ -82,7 +93,12 @@ files:
82
93
  - test/results/Audio-Open-The_New_Deal_for_Education.xml
83
94
  - test/results/GardnerRileyInterview.wrd
84
95
  - test/results/IIHS_Diane_Davis_Nov2009.seg
96
+ - test/results/lec1-edit.srt
97
+ - test/results/lec1.srt
85
98
  - test/results/NERCOMP-SpokenMedia4.wrd
99
+ - test/results/ocw-18_02-f07-lec01_220k.srt
100
+ - test/results/ocw-5.111-f08-lec01_300k.SRT
101
+ - test/results/ocw-8.03-f04-lec01-recognizer.wrd
86
102
  - test/results/PLACEHOLDER.txt
87
103
  - test/results/PLACEHOLDER.txt.ignore
88
104
  - test/results/vijay_kumar.wrd
@@ -132,6 +148,7 @@ test_files:
132
148
  - test/test_options.rb
133
149
  - test/test_runner.rb
134
150
  - test/test_segreader.rb
151
+ - test/test_srtreader.rb
135
152
  - test/test_transcript.rb
136
153
  - test/test_ttmlreader.rb
137
154
  - test/test_wrdreader.rb