srtparser_library 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/srtparser_library.rb +93 -0
- metadata +44 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 8271d16a39c370c343f86ea4867ec09d365c2d9b
|
4
|
+
data.tar.gz: 137cf28605fbff9cfaea9f5ffe584b8d77ff5ef1
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: cd2e0c13a5bb62c267d2cb16b48e6e72db26e84d648b870e4fc8d17062b883d8c52e0eccb27c4705688c20f075ad9086021b7658bbdc1cc015ae16330082f046
|
7
|
+
data.tar.gz: 159525b3a4a768497cde35af998199cfc67d8c875a3567d38aa3cc5b370d318c91bda319be4616c789c5b4eafed345bd75deef418990311fbf0906c65d9063cc
|
@@ -0,0 +1,93 @@
|
|
1
|
+
class SRTParse
|
2
|
+
# finds the avarages by taking the existing hash
|
3
|
+
def find_avarages(results)
|
4
|
+
results["average_symbols_per_line"] = (results["number_of_symbols"].to_f/results["number_of_lines"]).round(2)
|
5
|
+
results["average_symbols_per_sentence"] = (results["number_of_symbols"].to_f/results["number_of_sentences"]).round(2)
|
6
|
+
results["average_duration"] = (results["duration"]/results["number_of_subtitles"]).round(2)
|
7
|
+
return results
|
8
|
+
end
|
9
|
+
|
10
|
+
# conver hours, minutes and seconds+miliseconds to seconds
|
11
|
+
def to_sec(time, current_type)
|
12
|
+
case current_type
|
13
|
+
when "hours"
|
14
|
+
return time.to_i*3600
|
15
|
+
when "minutes"
|
16
|
+
return time.to_i*60
|
17
|
+
when "seconds"
|
18
|
+
return time.to_f
|
19
|
+
else
|
20
|
+
return "ERR"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# find the duration
|
25
|
+
# get every duration line and convert the time_to to seconds and overwrite duration
|
26
|
+
def duration(line)
|
27
|
+
timestamp = line.split("-->")
|
28
|
+
time_to = timestamp.last.gsub(/,/,".").split(":")
|
29
|
+
duration = to_sec(time_to[0], "hours") + to_sec(time_to[1], "minutes") + to_sec(time_to[2], "seconds")
|
30
|
+
return duration.round(2)
|
31
|
+
end
|
32
|
+
|
33
|
+
# find the max symbols per line by
|
34
|
+
# getting the current line and the max_symbols amount
|
35
|
+
def max_symbols_per_line(line, max_symbols)
|
36
|
+
current_line_symbols = line.scan(/[~!@\#$%^&*()\-{}\[\]|”:><?\/]/).count
|
37
|
+
if current_line_symbols > max_symbols then
|
38
|
+
return current_line_symbols
|
39
|
+
else
|
40
|
+
return max_symbols
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def parse_file(path_to_file)
|
45
|
+
#define Hash and vars
|
46
|
+
results = Hash.new(0)
|
47
|
+
lines_after_blank = 0
|
48
|
+
|
49
|
+
#reading file line by line
|
50
|
+
File.open(path_to_file, "r") do |infile|
|
51
|
+
infile.each_line do |line|
|
52
|
+
|
53
|
+
# if line is blank
|
54
|
+
# null lines_after_blank back to 0
|
55
|
+
if line =~ /^[\s]*$\n/ then
|
56
|
+
lines_after_blank = 0
|
57
|
+
next
|
58
|
+
end
|
59
|
+
|
60
|
+
case lines_after_blank
|
61
|
+
|
62
|
+
# when the line is the number of subtitle
|
63
|
+
# get it and count it
|
64
|
+
when 0 then
|
65
|
+
results["number_of_subtitles"] = line.to_i
|
66
|
+
|
67
|
+
# when the line is the duration line
|
68
|
+
# get the time to from the line and overwrite it on the results["duration"]
|
69
|
+
# we get the last duration
|
70
|
+
when 1 then
|
71
|
+
results["duration"] = duration(line)
|
72
|
+
|
73
|
+
# when the line is not blank or the first two after blank it means it is text line
|
74
|
+
# do some counting
|
75
|
+
else
|
76
|
+
results["number_of_words"] += line.gsub(/[[:punct:]]/, '').split.length
|
77
|
+
results["number_of_symbols"] += line.scan(/[~!@\#$%^&*()\-{}\[\]|”:><?\/]/).count
|
78
|
+
results["number_of_lines"] += 1
|
79
|
+
results["max_symbols_per_line"] = max_symbols_per_line( line, results["max_symbols_per_line"])
|
80
|
+
results["number_of_sentences"] += line.scan(/[^\.!?]+[\.!?]/).count
|
81
|
+
end
|
82
|
+
|
83
|
+
# after each line bomb the line_after_blank
|
84
|
+
lines_after_blank += 1
|
85
|
+
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# return results found earlier
|
90
|
+
# add avarage amounts
|
91
|
+
return find_avarages(results)
|
92
|
+
end
|
93
|
+
end
|
metadata
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: srtparser_library
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Boris Mutafov
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-11-01 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: srtparser_library - Parsing SRT Files.
|
14
|
+
email: mutafow@gmail.com
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- lib/srtparser_library.rb
|
20
|
+
homepage: http://rubygems.org/gems/srtparser_library
|
21
|
+
licenses:
|
22
|
+
- MIT
|
23
|
+
metadata: {}
|
24
|
+
post_install_message:
|
25
|
+
rdoc_options: []
|
26
|
+
require_paths:
|
27
|
+
- lib
|
28
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
29
|
+
requirements:
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
34
|
+
requirements:
|
35
|
+
- - ">="
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
requirements: []
|
39
|
+
rubyforge_project:
|
40
|
+
rubygems_version: 2.5.1
|
41
|
+
signing_key:
|
42
|
+
specification_version: 4
|
43
|
+
summary: srtparser_library
|
44
|
+
test_files: []
|