docfolio 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,10 @@
1
+ require_relative 'dates'
2
+ require_relative 'times'
3
+
4
+ # @param [Array] time_array updated start and end times in hours and minutes
5
+ # @param [Array] times_and_dates current start and end times and date
6
+ # @return [Array] returns the new times_and_dates array for use going forwards
7
+ module DateTimes
8
+ include Times
9
+ include Dates
10
+ end
@@ -0,0 +1,169 @@
1
+ # convert the date from 'dd-Oct-yy' to seconds past UNIX epoc
2
+ # accepts dd_Mmm-yy dd-Mmmmmmm-yy dd-MMM-yy and other similar
3
+ module Dates
4
+ # Extracts a date in seconds past UNIX epoc from a string date. The result
5
+ # can be used for other date operations. Converts from dd-mmm-yy and similar
6
+ # formats as commonly found in csv files
7
+ class DateFormatter
8
+ def format_date(date)
9
+ day, month, year = components(date)
10
+ begin
11
+ Time.new(year, month, day).to_i
12
+ rescue ArgumentError => e
13
+ print_argument_error_msg(e)
14
+ return nil
15
+ rescue => e
16
+ raise e
17
+ end
18
+ end
19
+
20
+ private
21
+
22
+ def print_argument_error_msg(e)
23
+ puts "\n#{e.to_s.upcase}"
24
+ puts "date : #{date.inspect}"
25
+ puts "day : #{day}"
26
+ puts "month : #{month}"
27
+ puts "year : #{year}"
28
+ puts e.backtrace
29
+ end
30
+
31
+ # splits date into is component day month and time
32
+ def components(date)
33
+ date = date.split('-')
34
+ day = date[0].to_i
35
+ month = convert_month_to_number(date[1])
36
+ year = date[2].to_i
37
+ if year < 100 # no century
38
+ year > Time.now.year % 1000 ? century = 1900 : century = 2000
39
+ year += century
40
+ end
41
+ [day, month, year]
42
+ end
43
+
44
+ # A hash of text months and their corresponding month number
45
+ MONTHS = {
46
+ 'jan' => 1,
47
+ 'feb' => 2,
48
+ 'mar' => 3,
49
+ 'apr' => 4,
50
+ 'may' => 5,
51
+ 'jun' => 6,
52
+ 'jul' => 7,
53
+ 'aug' => 8,
54
+ 'sep' => 9,
55
+ 'oct' => 10,
56
+ 'nov' => 11,
57
+ 'dec' => 12,
58
+ 'january' => 1,
59
+ 'february' => 2,
60
+ 'march' => 3,
61
+ 'april' => 4,
62
+ 'june' => 6,
63
+ 'july' => 7,
64
+ 'august' => 8,
65
+ 'september' => 9,
66
+ 'october' => 10,
67
+ 'november' => 11,
68
+ 'december' => 12,
69
+ 'sept' => 9
70
+ }
71
+
72
+ # Takes a text month to its corresponding number, case insensitive
73
+ # @param [String] month Month of year in text
74
+ # @return [Integer] Number of month in calander e.g. Feb is 2
75
+ def convert_month_to_number(month)
76
+ return month.to_i if month.to_i > 0 # already a number
77
+ month = month.downcase
78
+ MONTHS[month]
79
+ end
80
+ end
81
+
82
+ class DateExtractor
83
+ # The $LAST_MATCH_INFO global is equivalent to Rexexp.last_match and
84
+ # returns a MatchData object. This can be used as an array, where indices
85
+ # 1 - n are the matched backreferences of the last successful match
86
+ # @param [String] paragraph_text a paragraph from a DSL text file
87
+ # @param [Time] date Date of this paragraph. May be nil if not known.
88
+ # This date is taken from the Date class instance variable of the
89
+ # paragraph class.
90
+ # @return [Array<String, Array, Time>] Array of values to be returned
91
+ # [String return value] 'paragraph_text' the same paragraph that was passed to the function but without the matched date character if there were any.
92
+ # [Array return value] 'time_array' array of 4 integer representing the hours and minutes of the from and to times
93
+ # [Time return value] 'date' the date in (day month year) of this paragraph taken from the matched date_regex if there was one. Will be nil if there was no match and if the date passed to the function was also nil.
94
+ def extract_date(paragraph_text, date)
95
+ time_array = []
96
+
97
+ # if text contains a date match
98
+ if date_regex =~ paragraph_text
99
+ # $POSTMATCH (or $'), contains the characters after the match position
100
+ paragraph_text = $POSTMATCH
101
+
102
+ # strip whitespace if any remaining match or set to empty string
103
+ # if no match. If there is just white space after the match then
104
+ # this is truncated to an empty string
105
+ paragraph_text.nil? ? paragraph_text = '' : paragraph_text.strip!
106
+
107
+ # extracts the 'from' and 'to' times from the last match above. the
108
+ # time_array contains from_hour, from_min, to_hour, to_min, the
109
+ # date parameter is updated if the match found a new date
110
+ time_array, date = date_from_globals($LAST_MATCH_INFO, date)
111
+ end
112
+ [paragraph_text, time_array, date]
113
+ end
114
+
115
+ private
116
+
117
+ # Extracts a particular parameter from the MatchData object return when the
118
+ # paragraph was matched with the date regex. Treats the MatchData
119
+ # as an array, iterating through each index represented in the i_a
120
+ # array to find and return a value if there is one.
121
+ # @param [Array] i_a Array of integers representing positions to test in
122
+ # array glob_a
123
+ # @param [MatchData] glob_a Array of matched backreferences of the last
124
+ # successful regular expression match
125
+ # @return the first element in MatchData that is not nil. Returns
126
+ # nil if there are no elements in MatchData at the indices in i_a that
127
+ # are not nil.
128
+ def glob(i_a, glob_a)
129
+ i_a.each { |n| return glob_a[n] unless glob_a[n].nil? }
130
+ nil
131
+ end
132
+
133
+ # returns a date from the 26 globals returned by date_regex
134
+ # @param [MatchData] glob_a the MatchData object return when the date_regex
135
+ # was matched to the paragraph
136
+ # @param [Time] date the date of the paragraph; may be nil if not known
137
+ # @return [Array] array of 4 integer representing the
138
+ # hours and minutes of the from and to times
139
+ # @return [Time] 'date' the date (day month year) of this paragraph
140
+ def date_from_globals(glob_a, date)
141
+ from_hour = glob([1, 23], glob_a)
142
+ from_min = glob([2, 24], glob_a)
143
+ to_hour = glob([3, 25], glob_a)
144
+ to_min = glob([4, 26], glob_a)
145
+ day = glob([5, 8, 12, 14, 17, 21], glob_a)
146
+ month = glob([6, 9, 11, 15, 18, 20], glob_a)
147
+ year = glob([7, 10, 13, 16, 19, 22], glob_a)
148
+ date = Time.at(DateFormatter.new.format_date("#{day}-#{month}-#{year}")) unless day.nil?
149
+ [[from_hour, from_min, to_hour, to_min], date]
150
+ end
151
+
152
+ # Returns a regular expression to be used to match dates and times of
153
+ # the paragraph.
154
+ # @return [Regex] a regular expression to use to match dates and times
155
+ # in the paragraph
156
+ def date_regex
157
+ dy = /(?<day>\d{1,2})/
158
+ mt = /(?<month>\w+)/
159
+ yr = /(?<year>\d{2,4})/
160
+ time = /(?<hour>\d{1,2}):(?<min>\d{2})/
161
+ period = /#{time}( ?(?:-|–|to) ?#{time})?/
162
+ date1 = %r{#{dy}/#{dy}/#{yr}} # d/m/y
163
+ date2 = /#{dy},? #{mt},? #{yr}/ # d Month Year
164
+ date3 = /#{mt},? #{dy},? #{yr}/ # Month d Year
165
+ date = /#{date1}|#{date2}|#{date3}/
166
+ /^(#{period} ?#{date}?|#{date} ?#{period}?)/
167
+ end
168
+ end
169
+ end
@@ -0,0 +1,194 @@
1
+ require 'English'
2
+
3
+ # handles extraction of tagged or other significant content.
4
+ module TaggedContent
5
+ class TagExtractor
6
+ attr_reader :tags
7
+
8
+ def initialize
9
+ @tags = []
10
+ end
11
+
12
+ # Declaration of the class instance variable 'section'. The section is
13
+ # the tag applied to content if no user tag is detect. It changes depending
14
+ # on the position in the document.
15
+ class << self
16
+ attr_accessor :section
17
+ end
18
+
19
+ # resets the class variables so that a new file can be parsed
20
+ # is called by LearningDiary (through Paragraph) when preparing
21
+ # to parse a new txt file
22
+ def self.reset
23
+ TagExtractor.section = 0 # :TITLE
24
+ end
25
+
26
+ def self.all_tags
27
+ SECTIONS + TAGS
28
+ end
29
+
30
+ def extract_content(rest_of_str)
31
+ # if a new date or time has not been found then return
32
+ return if rest_of_str == ''
33
+
34
+ @old_tags = @tags
35
+ @tags += extract_tags(rest_of_str)
36
+
37
+ if tags_extracted?
38
+ # As soon as tags are extracted, there can only be note internal
39
+ # paragraph sections
40
+ TagExtractor.section = 2 #:NOTE
41
+ else
42
+ # No tags have been extracted from the str, so use the paragraphs
43
+ # current section
44
+ tag_as_section(rest_of_str)
45
+ end
46
+ end
47
+
48
+ # returns true if any tags are of type tag
49
+ # @param [Array] tag An array of tags
50
+ def tag?(tag)
51
+ @tags.each { |t| return true if t[0] == tag }
52
+ false
53
+ end
54
+
55
+ # Joins all content in @tags of with a tag of type tag
56
+ # @param [Symbol] tag The tag for which content that should be selected.
57
+ # @param [String] str An optional string that can be passed to the function
58
+ # to which selected content will be appended.
59
+ def content(tag, str = '')
60
+ tag_index = 0
61
+ content_index = 1
62
+ @tags.each { |t| str << t[content_index] + ' ' if t[tag_index] == tag }
63
+ str
64
+ end
65
+
66
+ # true if the paragraph contains a tag that can earn credit
67
+ def creditable?
68
+ tags.each { |t| return true if CREDITABLE.include?(t[0]) }
69
+ false
70
+ end
71
+
72
+ # true if the paragraph contains a tag used in significant events
73
+ # i.e. is the learning diary a significant event?
74
+ def significant_event?
75
+ tags.each { |t| return true if SIG_EVENT.include?(t[0]) }
76
+ false
77
+ end
78
+
79
+ # true if the paragraph contains a tag that can earn impact credit
80
+ def impact_creditable?
81
+ tags.each { |t| return true if t[0] == :I }
82
+ false
83
+ end
84
+
85
+ private
86
+
87
+ # Tags that are used in significant events
88
+ SIG_EVENT = [:SEA, :INVOLVED, :WHAT, :WHY, :FEELINGS, :WELL, :DIFFERENT, :CHANGE, :CHANGED]
89
+
90
+ # Tags that are used in learning logs
91
+ LEARNING_LOG = [:LP, :R, :DEN, :NOTE, :I]
92
+
93
+ # Tags that are part of the DSL and are recognized in text
94
+ TAGS = LEARNING_LOG + SIG_EVENT
95
+
96
+ # Tags that can earn CPD credit
97
+ CREDITABLE = [:LP, :R, :I, :HF, :WW, :WD]
98
+
99
+ # Tags that are applied to content based on the position in the document
100
+ SECTIONS = [:TITLE, :INTRO, :NOTE]
101
+
102
+ # Extracts a paragraph string to a tagged array with elements of type
103
+ # [:tag, 'content']. Called after the date/time info has been removed. If
104
+ # called before, will result in date info at the start being tagged as
105
+ # a :NOTE
106
+ # @param [String] paragraph_text Paragraph string after date info removed
107
+ # @return [Array] Tagged array
108
+ def extract_tags(paragraph_text)
109
+ tag_regex =~ paragraph_text ? extract_tag(paragraph_text) : []
110
+ end
111
+
112
+ # Add a single tagged content element of type
113
+ # [:symbol (tag), String (content)] to the @tags instance instance variable
114
+ # . Move the section class instance variable up one (to current :INTRO) if
115
+ # it is at position 0 (currently :TITLE)
116
+ # @param [String] p the content to tag
117
+ # @param [Symbol] tag the tag to use
118
+ def tag_it(tag, p)
119
+ @tags << [tag, p]
120
+ if TagExtractor.section == 0 &&
121
+ TagExtractor.section != (SECTIONS.count - 1)
122
+ TagExtractor.section += 1
123
+ end
124
+ end
125
+
126
+ # Creates a regex that can be used to match for tags that are recognized
127
+ # as part of the DSL, currently :LP, :R, :DEN, :NOTE and :I
128
+ def tag_regex
129
+ /\b(#{ TAGS.join '|' }): ?/
130
+ end
131
+
132
+ # Paragraphs are broken down into a tagged array, with elements of type
133
+ # [:tag, 'text']. The first element of an array is of type string. If the
134
+ # paragraph begins with text before any tags, then this first element will
135
+ # contain this text, otherwise it will be an empty string. This function
136
+ # tests this string and returns it tagged as :NOTE unless empty in which
137
+ # case it returns an empty array []
138
+ # @param [Array] a An array of tagged content of the paragraph
139
+ # @return [Array] Another tagged array which is either empty, or just
140
+ # contains a single tagged content of the text at the beginning if there
141
+ # was any.
142
+ def preface_with_note(a)
143
+ str = a[0].strip
144
+ str == '' ? [] : [[:NOTE, str]]
145
+ end
146
+
147
+ # Turns an array of strings into a tagged array. Ignores the string
148
+ # variable at position [0]. This first string is turned into a tagged
149
+ # array element by the function
150
+ # #preface_with_note and appended in #extract_tag
151
+ # @param [Array] a Array of strings from splitting the paragraph
152
+ # @return [Array] A tagged array with elements of the form
153
+ # [:tag, 'content']
154
+ def tags_array(a)
155
+ taggs = []
156
+ tag_count = (a.count - 1) / 2
157
+ 1.upto(tag_count) do |i|
158
+ tag = a[(i * 2) - 1].to_sym
159
+ content = a[i * 2].strip
160
+ taggs << [tag, content]
161
+ end
162
+ taggs
163
+ end
164
+
165
+ # Splits the paragraph into an array of tags of type [:tag, 'content']
166
+ # @param [String] paragraph_text String text of the paragraph with any
167
+ # date and time info at the beginning, removed
168
+ # @return [Array] Tagged array of content with elements of type
169
+ # [:tag, 'content']
170
+ def extract_tag(paragraph_text)
171
+ a = paragraph_text.split(tag_regex)
172
+ preface_with_note(a) + tags_array(a)
173
+ end
174
+
175
+ # @return [Boolean] True if any tags have been extracted
176
+ def tags_extracted?
177
+ @old_tags.count < @tags.count
178
+ end
179
+
180
+ # Add a single tagged content element of type
181
+ # [:symbol (tag), String (content)] to the @tags instance instance variable
182
+ # using the current value of the section class instance variable as an
183
+ # index to reference the correct section tag symbol from the SECTIONS
184
+ # array. Does not tags content identified in the TAGS array.
185
+ # @param [String] str the content to tag
186
+ def tag_as_section(str)
187
+ tag_it(section_tag, str)
188
+ end
189
+
190
+ def section_tag
191
+ SECTIONS[TagExtractor.section]
192
+ end
193
+ end
194
+ end
@@ -0,0 +1,74 @@
1
+ module Times
2
+ # processes new times and dates with current times and dates
3
+ class TimeProcesser
4
+ # Takes class start end times and dates as Time objects and amends the
5
+ # times, advancing the date if the start time has crossed midnight.
6
+ # @param [Array] new_times An array containing the from hour, from min,
7
+ # to hour, to min
8
+ def process_times(new_times)
9
+ f_hour, f_min, t_hour, t_min = new_times
10
+ to_start_time(f_hour, f_min) if has f_hour
11
+ to_end_time(t_hour, t_min) if has t_hour
12
+ end
13
+
14
+ class << self
15
+ attr_accessor :st, :et, :date
16
+ end
17
+
18
+ # resets the class variables so that a new file can be parsed
19
+ # is called by LearningDiary (through Paragraph) when preparing
20
+ # to parse a new txt file
21
+ def self.reset
22
+ TimeProcesser.date = TimeProcesser.st = TimeProcesser.et = nil
23
+ end
24
+
25
+ private
26
+
27
+ # Set the end time to the current paragraph date at t_hour and t_min.
28
+ # If the end time is before the start time, assume the end time is for the
29
+ # next day and add a day.
30
+ # @param [Number] t_hour To hours
31
+ # @param [Number] t_min To minutes
32
+ def to_end_time(t_hour, t_min)
33
+ # extract_time_object simply adds hours and mins to date
34
+ TimeProcesser.et = extract_time_object(t_hour, t_min, TimeProcesser.date)
35
+ # if end_time before start_time, assume it is the following day
36
+ TimeProcesser.et += a_day if TimeProcesser.et < TimeProcesser.st
37
+ end
38
+
39
+ # Adds hours and mins to date (Time). Then adds a day if the start time is
40
+ # before the end time. Finally, makes end time nil
41
+ def to_start_time(hour, min)
42
+ # extract_time_object simply adds hours and mins to date
43
+ TimeProcesser.st = extract_time_object(hour, min, TimeProcesser.date)
44
+ end
45
+
46
+ # Improves readability of boolean condition statements. Is used from Time
47
+ # objects and integer hour component, but it works for any object
48
+ # @param [Object] time_date_component Any object
49
+ # @return [Boolean] true if the parameter is not nil
50
+ def has(time_date_component)
51
+ !time_date_component.nil?
52
+ end
53
+
54
+ # returns one day in seconds and adds a day to the @date
55
+ def a_day
56
+ if TimeProcesser.date.nil?
57
+ fail('needs date')
58
+ else
59
+ TimeProcesser.date += 86_400
60
+ end
61
+ 86_400
62
+ end
63
+
64
+ # Adds a given number and hours and minutes to a Time object
65
+ # @param [Time] from time to which hours and minutes are added
66
+ # @param [Number] hour hours to add to from
67
+ # @param [Number] min minutes to add to from
68
+ # @return [Time] the result of hours and minutes after from
69
+ def extract_time_object(hour, min, from)
70
+ seconds = (hour.to_i * 3600) + (min.to_i * 60)
71
+ Time.at(from.to_i + seconds)
72
+ end
73
+ end
74
+ end