docfolio 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ require_relative 'dates'
2
+ require_relative 'times'
3
+
4
+ # @param [Array] time_array updated start and end times in hours and minutes
5
+ # @param [Array] times_and_dates current start and end times and date
6
+ # @return [Array] returns the new times_and_dates array for use going forwards
7
+ module DateTimes
8
+ include Times
9
+ include Dates
10
+ end
@@ -0,0 +1,169 @@
1
+ # convert the date from 'dd-Oct-yy' to seconds past UNIX epoc
2
+ # accepts dd_Mmm-yy dd-Mmmmmmm-yy dd-MMM-yy and other similar
3
+ module Dates
4
+ # Extracts a date in seconds past UNIX epoc from a string date. The result
5
+ # can be used for other date operations. Converts from dd-mmm-yy and similar
6
+ # formats as commonly found in csv files
7
+ class DateFormatter
8
+ def format_date(date)
9
+ day, month, year = components(date)
10
+ begin
11
+ Time.new(year, month, day).to_i
12
+ rescue ArgumentError => e
13
+ print_argument_error_msg(e)
14
+ return nil
15
+ rescue => e
16
+ raise e
17
+ end
18
+ end
19
+
20
+ private
21
+
22
+ def print_argument_error_msg(e)
23
+ puts "\n#{e.to_s.upcase}"
24
+ puts "date : #{date.inspect}"
25
+ puts "day : #{day}"
26
+ puts "month : #{month}"
27
+ puts "year : #{year}"
28
+ puts e.backtrace
29
+ end
30
+
31
+ # splits date into is component day month and time
32
+ def components(date)
33
+ date = date.split('-')
34
+ day = date[0].to_i
35
+ month = convert_month_to_number(date[1])
36
+ year = date[2].to_i
37
+ if year < 100 # no century
38
+ year > Time.now.year % 1000 ? century = 1900 : century = 2000
39
+ year += century
40
+ end
41
+ [day, month, year]
42
+ end
43
+
44
+ # A hash of text months and their corresponding month number
45
+ MONTHS = {
46
+ 'jan' => 1,
47
+ 'feb' => 2,
48
+ 'mar' => 3,
49
+ 'apr' => 4,
50
+ 'may' => 5,
51
+ 'jun' => 6,
52
+ 'jul' => 7,
53
+ 'aug' => 8,
54
+ 'sep' => 9,
55
+ 'oct' => 10,
56
+ 'nov' => 11,
57
+ 'dec' => 12,
58
+ 'january' => 1,
59
+ 'february' => 2,
60
+ 'march' => 3,
61
+ 'april' => 4,
62
+ 'june' => 6,
63
+ 'july' => 7,
64
+ 'august' => 8,
65
+ 'september' => 9,
66
+ 'october' => 10,
67
+ 'november' => 11,
68
+ 'december' => 12,
69
+ 'sept' => 9
70
+ }
71
+
72
+ # Takes a text month to its corresponding number, case insensitive
73
+ # @param [String] month Month of year in text
74
+ # @return [Integer] Number of month in calander e.g. Feb is 2
75
+ def convert_month_to_number(month)
76
+ return month.to_i if month.to_i > 0 # already a number
77
+ month = month.downcase
78
+ MONTHS[month]
79
+ end
80
+ end
81
+
82
+ class DateExtractor
83
+ # The $LAST_MATCH_INFO global is equivalent to Rexexp.last_match and
84
+ # returns a MatchData object. This can be used as an array, where indices
85
+ # 1 - n are the matched backreferences of the last successful match
86
+ # @param [String] paragraph_text a paragraph from a DSL text file
87
+ # @param [Time] date Date of this paragraph. May be nil if not known.
88
+ # This date is taken from the Date class instance variable of the
89
+ # paragraph class.
90
+ # @return [Array<String, Array, Time>] Array of values to be returned
91
+ # [String return value] 'paragraph_text' the same paragraph that was passed to the function but without the matched date character if there were any.
92
+ # [Array return value] 'time_array' array of 4 integer representing the hours and minutes of the from and to times
93
+ # [Time return value] 'date' the date in (day month year) of this paragraph taken from the matched date_regex if there was one. Will be nil if there was no match and if the date passed to the function was also nil.
94
+ def extract_date(paragraph_text, date)
95
+ time_array = []
96
+
97
+ # if text contains a date match
98
+ if date_regex =~ paragraph_text
99
+ # $POSTMATCH (or $'), contains the characters after the match position
100
+ paragraph_text = $POSTMATCH
101
+
102
+ # strip whitespace if any remaining match or set to empty string
103
+ # if no match. If there is just white space after the match then
104
+ # this is truncated to an empty string
105
+ paragraph_text.nil? ? paragraph_text = '' : paragraph_text.strip!
106
+
107
+ # extracts the 'from' and 'to' times from the last match above. the
108
+ # time_array contains from_hour, from_min, to_hour, to_min, the
109
+ # date parameter is updated if the match found a new date
110
+ time_array, date = date_from_globals($LAST_MATCH_INFO, date)
111
+ end
112
+ [paragraph_text, time_array, date]
113
+ end
114
+
115
+ private
116
+
117
+ # Extracts a particular parameter from the MatchData object return when the
118
+ # paragraph was matched with the date regex. Treats the MatchData
119
+ # as an array, iterating through each index represented in the i_a
120
+ # array to find and return a value if there is one.
121
+ # @param [Array] i_a Array of integers representing positions to test in
122
+ # array glob_a
123
+ # @param [MatchData] glob_a Array of matched backreferences of the last
124
+ # successful regular expression match
125
+ # @return the first element in MatchData that is not nil. Returns
126
+ # nil if there are no elements in MatchData at the indices in i_a that
127
+ # are not nil.
128
+ def glob(i_a, glob_a)
129
+ i_a.each { |n| return glob_a[n] unless glob_a[n].nil? }
130
+ nil
131
+ end
132
+
133
+ # returns a date from the 26 globals returned by date_regex
134
+ # @param [MatchData] glob_a the MatchData object return when the date_regex
135
+ # was matched to the paragraph
136
+ # @param [Time] date the date of the paragraph; may be nil if not known
137
+ # @return [Array] array of 4 integer representing the
138
+ # hours and minutes of the from and to times
139
+ # @return [Time] 'date' the date (day month year) of this paragraph
140
+ def date_from_globals(glob_a, date)
141
+ from_hour = glob([1, 23], glob_a)
142
+ from_min = glob([2, 24], glob_a)
143
+ to_hour = glob([3, 25], glob_a)
144
+ to_min = glob([4, 26], glob_a)
145
+ day = glob([5, 8, 12, 14, 17, 21], glob_a)
146
+ month = glob([6, 9, 11, 15, 18, 20], glob_a)
147
+ year = glob([7, 10, 13, 16, 19, 22], glob_a)
148
+ date = Time.at(DateFormatter.new.format_date("#{day}-#{month}-#{year}")) unless day.nil?
149
+ [[from_hour, from_min, to_hour, to_min], date]
150
+ end
151
+
152
+ # Returns a regular expression to be used to match dates and times of
153
+ # the paragraph.
154
+ # @return [Regex] a regular expression to use to match dates and times
155
+ # in the paragraph
156
+ def date_regex
157
+ dy = /(?<day>\d{1,2})/
158
+ mt = /(?<month>\w+)/
159
+ yr = /(?<year>\d{2,4})/
160
+ time = /(?<hour>\d{1,2}):(?<min>\d{2})/
161
+ period = /#{time}( ?(?:-|–|to) ?#{time})?/
162
+ date1 = %r{#{dy}/#{dy}/#{yr}} # d/m/y
163
+ date2 = /#{dy},? #{mt},? #{yr}/ # d Month Year
164
+ date3 = /#{mt},? #{dy},? #{yr}/ # Month d Year
165
+ date = /#{date1}|#{date2}|#{date3}/
166
+ /^(#{period} ?#{date}?|#{date} ?#{period}?)/
167
+ end
168
+ end
169
+ end
@@ -0,0 +1,194 @@
1
+ require 'English'
2
+
3
+ # handles extraction of tagged or other significant content.
4
+ module TaggedContent
5
+ class TagExtractor
6
+ attr_reader :tags
7
+
8
+ def initialize
9
+ @tags = []
10
+ end
11
+
12
+ # Declaration of the class instance variable 'section'. The section is
13
+ # the tag applied to content if no user tag is detect. It changes depending
14
+ # on the position in the document.
15
+ class << self
16
+ attr_accessor :section
17
+ end
18
+
19
+ # resets the class variables so that a new file can be parsed
20
+ # is called by LearningDiary (through Paragraph) when preparing
21
+ # to parse a new txt file
22
+ def self.reset
23
+ TagExtractor.section = 0 # :TITLE
24
+ end
25
+
26
+ def self.all_tags
27
+ SECTIONS + TAGS
28
+ end
29
+
30
+ def extract_content(rest_of_str)
31
+ # if a new date or time has not been found then return
32
+ return if rest_of_str == ''
33
+
34
+ @old_tags = @tags
35
+ @tags += extract_tags(rest_of_str)
36
+
37
+ if tags_extracted?
38
+ # As soon as tags are extracted, there can only be note internal
39
+ # paragraph sections
40
+ TagExtractor.section = 2 #:NOTE
41
+ else
42
+ # No tags have been extracted from the str, so use the paragraphs
43
+ # current section
44
+ tag_as_section(rest_of_str)
45
+ end
46
+ end
47
+
48
+ # returns true if any tags are of type tag
49
+ # @param [Array] tag An array of tags
50
+ def tag?(tag)
51
+ @tags.each { |t| return true if t[0] == tag }
52
+ false
53
+ end
54
+
55
+ # Joins all content in @tags of with a tag of type tag
56
+ # @param [Symbol] tag The tag for which content that should be selected.
57
+ # @param [String] str An optional string that can be passed to the function
58
+ # to which selected content will be appended.
59
+ def content(tag, str = '')
60
+ tag_index = 0
61
+ content_index = 1
62
+ @tags.each { |t| str << t[content_index] + ' ' if t[tag_index] == tag }
63
+ str
64
+ end
65
+
66
+ # true if the paragraph contains a tag that can earn credit
67
+ def creditable?
68
+ tags.each { |t| return true if CREDITABLE.include?(t[0]) }
69
+ false
70
+ end
71
+
72
+ # true if the paragraph contains a tag used in significant events
73
+ # i.e. is the learning diary a significant event?
74
+ def significant_event?
75
+ tags.each { |t| return true if SIG_EVENT.include?(t[0]) }
76
+ false
77
+ end
78
+
79
+ # true if the paragraph contains a tag that can earn impact credit
80
+ def impact_creditable?
81
+ tags.each { |t| return true if t[0] == :I }
82
+ false
83
+ end
84
+
85
+ private
86
+
87
+ # Tags that are used in significant events
88
+ SIG_EVENT = [:SEA, :INVOLVED, :WHAT, :WHY, :FEELINGS, :WELL, :DIFFERENT, :CHANGE, :CHANGED]
89
+
90
+ # Tags that are used in learning logs
91
+ LEARNING_LOG = [:LP, :R, :DEN, :NOTE, :I]
92
+
93
+ # Tags that are part of the DSL and are recognized in text
94
+ TAGS = LEARNING_LOG + SIG_EVENT
95
+
96
+ # Tags that can earn CPD credit
97
+ CREDITABLE = [:LP, :R, :I, :HF, :WW, :WD]
98
+
99
+ # Tags that are applied to content based on the position in the document
100
+ SECTIONS = [:TITLE, :INTRO, :NOTE]
101
+
102
+ # Extracts a paragraph string to a tagged array with elements of type
103
+ # [:tag, 'content']. Called after the date/time info has been removed. If
104
+ # called before, will result in date info at the start being tagged as
105
+ # a :NOTE
106
+ # @param [String] paragraph_text Paragraph string after date info removed
107
+ # @return [Array] Tagged array
108
+ def extract_tags(paragraph_text)
109
+ tag_regex =~ paragraph_text ? extract_tag(paragraph_text) : []
110
+ end
111
+
112
+ # Add a single tagged content element of type
113
+ # [:symbol (tag), String (content)] to the @tags instance instance variable
114
+ # . Move the section class instance variable up one (to current :INTRO) if
115
+ # it is at position 0 (currently :TITLE)
116
+ # @param [String] p the content to tag
117
+ # @param [Symbol] tag the tag to use
118
+ def tag_it(tag, p)
119
+ @tags << [tag, p]
120
+ if TagExtractor.section == 0 &&
121
+ TagExtractor.section != (SECTIONS.count - 1)
122
+ TagExtractor.section += 1
123
+ end
124
+ end
125
+
126
+ # Creates a regex that can be used to match for tags that are recognized
127
+ # as part of the DSL, currently :LP, :R, :DEN, :NOTE and :I
128
+ def tag_regex
129
+ /\b(#{ TAGS.join '|' }): ?/
130
+ end
131
+
132
+ # Paragraphs are broken down into a tagged array, with elements of type
133
+ # [:tag, 'text']. The first element of an array is of type string. If the
134
+ # paragraph begins with text before any tags, then this first element will
135
+ # contain this text, otherwise it will be an empty string. This function
136
+ # tests this string and returns it tagged as :NOTE unless empty in which
137
+ # case it returns an empty array []
138
+ # @param [Array] a An array of tagged content of the paragraph
139
+ # @return [Array] Another tagged array which is either empty, or just
140
+ # contains a single tagged content of the text at the beginning if there
141
+ # was any.
142
+ def preface_with_note(a)
143
+ str = a[0].strip
144
+ str == '' ? [] : [[:NOTE, str]]
145
+ end
146
+
147
+ # Turns an array of strings into a tagged array. Ignores the string
148
+ # variable at position [0]. This first string is turned into a tagged
149
+ # array element by the function
150
+ # #preface_with_note and appended in #extract_tag
151
+ # @param [Array] a Array of strings from splitting the paragraph
152
+ # @return [Array] A tagged array with elements of the form
153
+ # [:tag, 'content']
154
+ def tags_array(a)
155
+ taggs = []
156
+ tag_count = (a.count - 1) / 2
157
+ 1.upto(tag_count) do |i|
158
+ tag = a[(i * 2) - 1].to_sym
159
+ content = a[i * 2].strip
160
+ taggs << [tag, content]
161
+ end
162
+ taggs
163
+ end
164
+
165
+ # Splits the paragraph into an array of tags of type [:tag, 'content']
166
+ # @param [String] paragraph_text String text of the paragraph with any
167
+ # date and time info at the beginning, removed
168
+ # @return [Array] Tagged array of content with elements of type
169
+ # [:tag, 'content']
170
+ def extract_tag(paragraph_text)
171
+ a = paragraph_text.split(tag_regex)
172
+ preface_with_note(a) + tags_array(a)
173
+ end
174
+
175
+ # @return [Boolean] True if any tags have been extracted
176
+ def tags_extracted?
177
+ @old_tags.count < @tags.count
178
+ end
179
+
180
+ # Add a single tagged content element of type
181
+ # [:symbol (tag), String (content)] to the @tags instance instance variable
182
+ # using the current value of the section class instance variable as an
183
+ # index to reference the correct section tag symbol from the SECTIONS
184
+ # array. Does not tags content identified in the TAGS array.
185
+ # @param [String] str the content to tag
186
+ def tag_as_section(str)
187
+ tag_it(section_tag, str)
188
+ end
189
+
190
+ def section_tag
191
+ SECTIONS[TagExtractor.section]
192
+ end
193
+ end
194
+ end
@@ -0,0 +1,74 @@
1
+ module Times
2
+ # processes new times and dates with current times and dates
3
+ class TimeProcesser
4
+ # Takes class start end times and dates as Time objects and amends the
5
+ # times, advancing the date if the start time has crossed midnight.
6
+ # @param [Array] new_times An array containing the from hour, from min,
7
+ # to hour, to min
8
+ def process_times(new_times)
9
+ f_hour, f_min, t_hour, t_min = new_times
10
+ to_start_time(f_hour, f_min) if has f_hour
11
+ to_end_time(t_hour, t_min) if has t_hour
12
+ end
13
+
14
+ class << self
15
+ attr_accessor :st, :et, :date
16
+ end
17
+
18
+ # resets the class variables so that a new file can be parsed
19
+ # is called by LearningDiary (through Paragraph) when preparing
20
+ # to parse a new txt file
21
+ def self.reset
22
+ TimeProcesser.date = TimeProcesser.st = TimeProcesser.et = nil
23
+ end
24
+
25
+ private
26
+
27
+ # Set the end time to the current paragraph date at t_hour and t_min.
28
+ # If the end time is before the start time, assume the end time is for the
29
+ # next day and add a day.
30
+ # @param [Number] t_hour To hours
31
+ # @param [Number] t_min To minutes
32
+ def to_end_time(t_hour, t_min)
33
+ # extract_time_object simply adds hours and mins to date
34
+ TimeProcesser.et = extract_time_object(t_hour, t_min, TimeProcesser.date)
35
+ # if end_time before start_time, assume it is the following day
36
+ TimeProcesser.et += a_day if TimeProcesser.et < TimeProcesser.st
37
+ end
38
+
39
+ # Adds hours and mins to date (Time). Then adds a day if the start time is
40
+ # before the end time. Finally, makes end time nil
41
+ def to_start_time(hour, min)
42
+ # extract_time_object simply adds hours and mins to date
43
+ TimeProcesser.st = extract_time_object(hour, min, TimeProcesser.date)
44
+ end
45
+
46
+ # Improves readability of boolean condition statements. Is used from Time
47
+ # objects and integer hour component, but it works for any object
48
+ # @param [Object] time_date_component Any object
49
+ # @return [Boolean] true if the parameter is not nil
50
+ def has(time_date_component)
51
+ !time_date_component.nil?
52
+ end
53
+
54
+ # returns one day in seconds and adds a day to the @date
55
+ def a_day
56
+ if TimeProcesser.date.nil?
57
+ fail('needs date')
58
+ else
59
+ TimeProcesser.date += 86_400
60
+ end
61
+ 86_400
62
+ end
63
+
64
+ # Adds a given number and hours and minutes to a Time object
65
+ # @param [Time] from time to which hours and minutes are added
66
+ # @param [Number] hour hours to add to from
67
+ # @param [Number] min minutes to add to from
68
+ # @return [Time] the result of hours and minutes after from
69
+ def extract_time_object(hour, min, from)
70
+ seconds = (hour.to_i * 3600) + (min.to_i * 60)
71
+ Time.at(from.to_i + seconds)
72
+ end
73
+ end
74
+ end