atomic_assessments_import 0.2.4 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +21 -1
- data/docs/plans/2026-02-11-flexible-examsoft-importer-design.md +127 -0
- data/docs/plans/2026-02-11-flexible-examsoft-importer-plan.md +2635 -0
- data/lib/atomic_assessments_import/csv/converter.rb +3 -3
- data/lib/atomic_assessments_import/exam_soft/chunker/heading_split_strategy.rb +38 -0
- data/lib/atomic_assessments_import/exam_soft/chunker/horizontal_rule_split_strategy.rb +37 -0
- data/lib/atomic_assessments_import/exam_soft/chunker/metadata_marker_strategy.rb +38 -0
- data/lib/atomic_assessments_import/exam_soft/chunker/numbered_question_strategy.rb +41 -0
- data/lib/atomic_assessments_import/exam_soft/chunker/strategy.rb +22 -0
- data/lib/atomic_assessments_import/exam_soft/chunker.rb +46 -0
- data/lib/atomic_assessments_import/exam_soft/converter.rb +203 -0
- data/lib/atomic_assessments_import/exam_soft/extractor/correct_answer_detector.rb +36 -0
- data/lib/atomic_assessments_import/exam_soft/extractor/feedback_detector.rb +50 -0
- data/lib/atomic_assessments_import/exam_soft/extractor/metadata_detector.rb +37 -0
- data/lib/atomic_assessments_import/exam_soft/extractor/options_detector.rb +44 -0
- data/lib/atomic_assessments_import/exam_soft/extractor/question_stem_detector.rb +44 -0
- data/lib/atomic_assessments_import/exam_soft/extractor/question_type_detector.rb +51 -0
- data/lib/atomic_assessments_import/exam_soft/extractor.rb +96 -0
- data/lib/atomic_assessments_import/exam_soft.rb +10 -0
- data/lib/atomic_assessments_import/questions/cloze_dropdown.rb +62 -0
- data/lib/atomic_assessments_import/questions/essay.rb +20 -0
- data/lib/atomic_assessments_import/questions/fill_in_the_blank.rb +49 -0
- data/lib/atomic_assessments_import/questions/matching.rb +42 -0
- data/lib/atomic_assessments_import/questions/multiple_choice.rb +102 -0
- data/lib/atomic_assessments_import/questions/ordering.rb +53 -0
- data/lib/atomic_assessments_import/questions/question.rb +106 -0
- data/lib/atomic_assessments_import/questions/short_answer.rb +24 -0
- data/lib/atomic_assessments_import/utils.rb +21 -0
- data/lib/atomic_assessments_import/version.rb +1 -1
- data/lib/atomic_assessments_import/writer.rb +1 -1
- data/lib/atomic_assessments_import.rb +31 -12
- metadata +62 -13
- data/lib/atomic_assessments_import/csv/questions/multiple_choice.rb +0 -104
- data/lib/atomic_assessments_import/csv/questions/question.rb +0 -86
- data/lib/atomic_assessments_import/csv/utils.rb +0 -24
|
@@ -3,9 +3,9 @@
|
|
|
3
3
|
require "csv"
|
|
4
4
|
require "active_support/core_ext/digest/uuid"
|
|
5
5
|
|
|
6
|
-
require_relative "questions/question"
|
|
7
|
-
require_relative "questions/multiple_choice"
|
|
8
|
-
require_relative "utils"
|
|
6
|
+
require_relative "../questions/question"
|
|
7
|
+
require_relative "../questions/multiple_choice"
|
|
8
|
+
require_relative "../utils"
|
|
9
9
|
|
|
10
10
|
module AtomicAssessmentsImport
|
|
11
11
|
module CSV
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "strategy"
|
|
4
|
+
|
|
5
|
+
module AtomicAssessmentsImport
|
|
6
|
+
module ExamSoft
|
|
7
|
+
module Chunker
|
|
8
|
+
class HeadingSplitStrategy < Strategy
|
|
9
|
+
HEADING_PATTERN = /^h[1-6]$/i
|
|
10
|
+
|
|
11
|
+
def split(doc)
|
|
12
|
+
@header_nodes = []
|
|
13
|
+
chunks = []
|
|
14
|
+
current_chunk = []
|
|
15
|
+
found_first = false
|
|
16
|
+
|
|
17
|
+
doc.children.each do |node|
|
|
18
|
+
text = node.text.strip
|
|
19
|
+
next if text.empty? && !node.name.match?(/^(img|table|hr)$/i)
|
|
20
|
+
|
|
21
|
+
if node.name.match?(HEADING_PATTERN)
|
|
22
|
+
found_first = true
|
|
23
|
+
chunks << current_chunk unless current_chunk.empty?
|
|
24
|
+
current_chunk = [node]
|
|
25
|
+
elsif found_first
|
|
26
|
+
current_chunk << node
|
|
27
|
+
else
|
|
28
|
+
@header_nodes << node
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
chunks << current_chunk unless current_chunk.empty?
|
|
33
|
+
chunks.length >= 2 ? chunks : []
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "strategy"
|
|
4
|
+
|
|
5
|
+
module AtomicAssessmentsImport
|
|
6
|
+
module ExamSoft
|
|
7
|
+
module Chunker
|
|
8
|
+
class HorizontalRuleSplitStrategy < Strategy
|
|
9
|
+
def split(doc)
|
|
10
|
+
@header_nodes = []
|
|
11
|
+
segments = []
|
|
12
|
+
current_segment = []
|
|
13
|
+
|
|
14
|
+
doc.children.each do |node|
|
|
15
|
+
text = node.text.strip
|
|
16
|
+
|
|
17
|
+
if node.name.match?(/^hr$/i)
|
|
18
|
+
segments << current_segment unless current_segment.empty?
|
|
19
|
+
current_segment = []
|
|
20
|
+
next
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
next if text.empty? && !node.name.match?(/^(img|table)$/i)
|
|
24
|
+
|
|
25
|
+
current_segment << node
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
segments << current_segment unless current_segment.empty?
|
|
29
|
+
|
|
30
|
+
@header_nodes = segments.shift if segments.length >= 3
|
|
31
|
+
|
|
32
|
+
segments.length >= 2 ? segments : []
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "strategy"
|
|
4
|
+
|
|
5
|
+
module AtomicAssessmentsImport
|
|
6
|
+
module ExamSoft
|
|
7
|
+
module Chunker
|
|
8
|
+
class MetadataMarkerStrategy < Strategy
|
|
9
|
+
MARKER_PATTERN = /\A\s*(?:Type:|Folder:)\s*/i
|
|
10
|
+
|
|
11
|
+
def split(doc)
|
|
12
|
+
@header_nodes = []
|
|
13
|
+
chunks = []
|
|
14
|
+
current_chunk = []
|
|
15
|
+
found_first = false
|
|
16
|
+
|
|
17
|
+
doc.children.each do |node|
|
|
18
|
+
text = node.text.strip
|
|
19
|
+
next if text.empty? && !node.name.match?(/^(img|table|hr)$/i)
|
|
20
|
+
|
|
21
|
+
if text.match?(MARKER_PATTERN)
|
|
22
|
+
found_first = true
|
|
23
|
+
chunks << current_chunk unless current_chunk.empty?
|
|
24
|
+
current_chunk = [node]
|
|
25
|
+
elsif found_first
|
|
26
|
+
current_chunk << node
|
|
27
|
+
else
|
|
28
|
+
@header_nodes << node
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
chunks << current_chunk unless current_chunk.empty?
|
|
33
|
+
chunks
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "strategy"
|
|
4
|
+
|
|
5
|
+
module AtomicAssessmentsImport
|
|
6
|
+
module ExamSoft
|
|
7
|
+
module Chunker
|
|
8
|
+
class NumberedQuestionStrategy < Strategy
|
|
9
|
+
# Matches "1)" or "1." or "1" or "12)" etc. at start of text, but NOT single letters like "a)" because those are used for options, not question numbering
|
|
10
|
+
# We also allow for an optional "Question" prefix, e.g. "Question 1)" or "Question #: 1"
|
|
11
|
+
# NUMBERED_PATTERN = /\A\s*(\d+)\s*[.)]/
|
|
12
|
+
NUMBERED_PATTERN = /\A\s*(?:Question\s*[:#]?\s*)?(\d+)\s*[.)]/
|
|
13
|
+
|
|
14
|
+
def split(doc)
|
|
15
|
+
@header_nodes = []
|
|
16
|
+
chunks = []
|
|
17
|
+
current_chunk = []
|
|
18
|
+
found_first = false
|
|
19
|
+
|
|
20
|
+
doc.children.each do |node|
|
|
21
|
+
text = node.text.strip
|
|
22
|
+
next if text.empty? && !node.name.match?(/^(img|table|hr)$/i)
|
|
23
|
+
|
|
24
|
+
if text.match?(NUMBERED_PATTERN)
|
|
25
|
+
found_first = true
|
|
26
|
+
chunks << current_chunk unless current_chunk.empty?
|
|
27
|
+
current_chunk = [node]
|
|
28
|
+
elsif found_first
|
|
29
|
+
current_chunk << node
|
|
30
|
+
else
|
|
31
|
+
@header_nodes << node
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
chunks << current_chunk unless current_chunk.empty?
|
|
36
|
+
found_first ? chunks : []
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module AtomicAssessmentsImport
|
|
4
|
+
module ExamSoft
|
|
5
|
+
module Chunker
|
|
6
|
+
class Strategy
|
|
7
|
+
attr_reader :header_nodes
|
|
8
|
+
|
|
9
|
+
def initialize
|
|
10
|
+
@header_nodes = []
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
# Subclasses implement this. Returns an array of chunks,
|
|
14
|
+
# where each chunk is an array of Nokogiri nodes belonging to one question.
|
|
15
|
+
# Returns empty array if this strategy doesn't apply to the document.
|
|
16
|
+
def split(doc)
|
|
17
|
+
raise NotImplementedError
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "chunker/strategy"
|
|
4
|
+
require_relative "chunker/metadata_marker_strategy"
|
|
5
|
+
require_relative "chunker/numbered_question_strategy"
|
|
6
|
+
require_relative "chunker/heading_split_strategy"
|
|
7
|
+
require_relative "chunker/horizontal_rule_split_strategy"
|
|
8
|
+
|
|
9
|
+
module AtomicAssessmentsImport
|
|
10
|
+
module ExamSoft
|
|
11
|
+
module Chunker
|
|
12
|
+
STRATEGIES = [
|
|
13
|
+
MetadataMarkerStrategy,
|
|
14
|
+
NumberedQuestionStrategy,
|
|
15
|
+
HeadingSplitStrategy,
|
|
16
|
+
HorizontalRuleSplitStrategy,
|
|
17
|
+
].freeze
|
|
18
|
+
|
|
19
|
+
def self.chunk(doc)
|
|
20
|
+
warnings = []
|
|
21
|
+
|
|
22
|
+
STRATEGIES.each do |strategy_class|
|
|
23
|
+
strategy = strategy_class.new
|
|
24
|
+
chunks = strategy.split(doc)
|
|
25
|
+
next if chunks.empty?
|
|
26
|
+
|
|
27
|
+
return {
|
|
28
|
+
chunks: chunks,
|
|
29
|
+
header_nodes: strategy.header_nodes,
|
|
30
|
+
warnings: warnings,
|
|
31
|
+
}
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# No strategy matched — return entire document as one chunk
|
|
35
|
+
all_nodes = doc.children.reject { |n| n.text.strip.empty? && !n.name.match?(/^(img|table|hr)$/i) }
|
|
36
|
+
warnings << "No chunking strategy matched. Treating entire document as a single question."
|
|
37
|
+
|
|
38
|
+
{
|
|
39
|
+
chunks: [all_nodes],
|
|
40
|
+
header_nodes: [],
|
|
41
|
+
warnings: warnings,
|
|
42
|
+
}
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pandoc-ruby"
|
|
4
|
+
require "nokogiri"
|
|
5
|
+
require "active_support/core_ext/digest/uuid"
|
|
6
|
+
|
|
7
|
+
require_relative "../questions/question"
|
|
8
|
+
require_relative "../questions/multiple_choice"
|
|
9
|
+
require_relative "../questions/essay"
|
|
10
|
+
require_relative "../questions/short_answer"
|
|
11
|
+
require_relative "../questions/fill_in_the_blank"
|
|
12
|
+
require_relative "../questions/matching"
|
|
13
|
+
require_relative "../questions/ordering"
|
|
14
|
+
require_relative "../utils"
|
|
15
|
+
require_relative "chunker"
|
|
16
|
+
require_relative "extractor"
|
|
17
|
+
|
|
18
|
+
module AtomicAssessmentsImport
|
|
19
|
+
module ExamSoft
|
|
20
|
+
class Converter
|
|
21
|
+
def initialize(file)
|
|
22
|
+
@file = file
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def convert
|
|
26
|
+
html = normalize_to_html
|
|
27
|
+
doc = Nokogiri::HTML.fragment(html)
|
|
28
|
+
normalize_html_structure(doc)
|
|
29
|
+
|
|
30
|
+
# Chunk the document
|
|
31
|
+
chunk_result = Chunker.chunk(doc)
|
|
32
|
+
all_warnings = chunk_result[:warnings].map { |w| build_warning(w) }
|
|
33
|
+
|
|
34
|
+
if chunk_result[:chunks].length == 1
|
|
35
|
+
all_warnings << build_warning("Only 1 chunk detected — document may not be in a recognized format")
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Log header info if present
|
|
39
|
+
unless chunk_result[:header_nodes].empty?
|
|
40
|
+
header_text = chunk_result[:header_nodes].map { |n| n.text.strip }.join(" ")
|
|
41
|
+
all_warnings << build_warning("Exam header detected: #{header_text}") unless header_text.empty?
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
items = []
|
|
45
|
+
questions = []
|
|
46
|
+
|
|
47
|
+
chunk_result[:chunks].each_with_index do |chunk_nodes, index|
|
|
48
|
+
# Extract fields from this chunk
|
|
49
|
+
extraction = Extractor.extract(chunk_nodes)
|
|
50
|
+
extraction[:warnings].each do |w|
|
|
51
|
+
all_warnings << build_warning("Question #{index + 1}: #{w}", index: index, question_type: extraction[:row]["question type"])
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
row = extraction[:row]
|
|
55
|
+
status = extraction[:status]
|
|
56
|
+
|
|
57
|
+
# Skip completely unparseable chunks
|
|
58
|
+
if row["question text"].nil? && row["option a"].nil?
|
|
59
|
+
all_warnings << build_warning("Question #{index + 1}: Skipped — no usable content found", index: index)
|
|
60
|
+
next
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
next unless status == "published"
|
|
64
|
+
|
|
65
|
+
begin
|
|
66
|
+
item, question_widgets = convert_row(row, "published")
|
|
67
|
+
items << item
|
|
68
|
+
questions += question_widgets
|
|
69
|
+
rescue StandardError => e
|
|
70
|
+
title = row["title"] || "Question #{index + 1}"
|
|
71
|
+
all_warnings << build_warning("#{title}: #{e.message}", index: index, question_type: row["question type"])
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
{
|
|
76
|
+
activities: [],
|
|
77
|
+
items: items,
|
|
78
|
+
questions: questions,
|
|
79
|
+
features: [],
|
|
80
|
+
errors: all_warnings,
|
|
81
|
+
}
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
private
|
|
85
|
+
|
|
86
|
+
def build_warning(message, index: nil, question_type: nil)
|
|
87
|
+
{
|
|
88
|
+
error_type: "warning",
|
|
89
|
+
question_type: question_type,
|
|
90
|
+
message: message,
|
|
91
|
+
qti_item_id: nil,
|
|
92
|
+
index: index,
|
|
93
|
+
}
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def normalize_html_structure(doc)
|
|
97
|
+
doc.css("p").each do |p_node|
|
|
98
|
+
br_children = p_node.css("br")
|
|
99
|
+
next if br_children.empty?
|
|
100
|
+
|
|
101
|
+
# Split the <p> at each <br> into separate <p> elements
|
|
102
|
+
segments = []
|
|
103
|
+
current_segment = []
|
|
104
|
+
|
|
105
|
+
p_node.children.each do |child|
|
|
106
|
+
if child.name == "br"
|
|
107
|
+
segments << current_segment unless current_segment.empty?
|
|
108
|
+
current_segment = []
|
|
109
|
+
else
|
|
110
|
+
current_segment << child
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
segments << current_segment unless current_segment.empty?
|
|
114
|
+
|
|
115
|
+
next if segments.length <= 1
|
|
116
|
+
|
|
117
|
+
# Replace original <p> with multiple <p> elements
|
|
118
|
+
segments.reverse_each do |segment|
|
|
119
|
+
new_p = Nokogiri::XML::Node.new("p", doc)
|
|
120
|
+
segment.each { |child| new_p.add_child(child.clone) }
|
|
121
|
+
p_node.add_next_sibling(new_p)
|
|
122
|
+
end
|
|
123
|
+
p_node.remove
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def normalize_to_html
|
|
128
|
+
# Note: Pandoc Ruby takes either a file path or a string of content, but not a File object directly, so we have to handle both cases here
|
|
129
|
+
if @file.is_a?(String)
|
|
130
|
+
# File path as string
|
|
131
|
+
PandocRuby.new([@file], from: @file.split(".").last).to_html
|
|
132
|
+
elsif @file.respond_to?(:path) && @file.respond_to?(:read)
|
|
133
|
+
# File-like object (File, Tempfile, etc.)
|
|
134
|
+
source_type = @file.path.split(".").last.match(/^[a-zA-Z]+/)[0]
|
|
135
|
+
PandocRuby.new(@file.read, from: source_type).to_html
|
|
136
|
+
else
|
|
137
|
+
raise ArgumentError, "Expected a file path (String) or file-like object, got #{@file.class}"
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def categories_to_tags(categories)
|
|
142
|
+
tags = {}
|
|
143
|
+
(categories || []).each do |cat|
|
|
144
|
+
parts = cat.to_s.split("/")
|
|
145
|
+
key = parts.shift&.strip
|
|
146
|
+
value = parts.join("/").strip
|
|
147
|
+
next if key.blank? || value.blank?
|
|
148
|
+
|
|
149
|
+
key = key.delete(":")[0, 255]
|
|
150
|
+
value = value[0, 255]
|
|
151
|
+
next if key.blank? || value.blank?
|
|
152
|
+
|
|
153
|
+
tags[key.to_sym] ||= []
|
|
154
|
+
tags[key.to_sym] |= [value]
|
|
155
|
+
end
|
|
156
|
+
tags
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def convert_row(row, status = "published")
|
|
160
|
+
source = "<p>ExamSoft Import on #{Time.now.strftime('%Y-%m-%d')}</p>\n"
|
|
161
|
+
source += "<p>External id: #{row['question id']}</p>\n" if row["question id"].present?
|
|
162
|
+
|
|
163
|
+
question = Questions::Question.load(row)
|
|
164
|
+
# ExamSoft has a dedicated Multiple Answer question type, but Learnosity does not, so we need to update the question type and UI style for those questions
|
|
165
|
+
question_learnosity = question.to_learnosity
|
|
166
|
+
if row["question type"] == "ma"
|
|
167
|
+
question_learnosity[:data][:ui_style] = { choice_label: "upper-alpha", type: "block" }
|
|
168
|
+
question_learnosity[:data][:multiple_responses] = true
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
item = {
|
|
172
|
+
reference: SecureRandom.uuid,
|
|
173
|
+
title: row["title"] || "",
|
|
174
|
+
status: status,
|
|
175
|
+
tags: categories_to_tags(row["category"]),
|
|
176
|
+
metadata: {
|
|
177
|
+
import_date: Time.now.iso8601,
|
|
178
|
+
import_type: row["import_type"] || "examsoft",
|
|
179
|
+
},
|
|
180
|
+
source: source,
|
|
181
|
+
description: row["description"] || "",
|
|
182
|
+
questions: [
|
|
183
|
+
{
|
|
184
|
+
reference: question.reference,
|
|
185
|
+
type: question.question_type,
|
|
186
|
+
},
|
|
187
|
+
],
|
|
188
|
+
features: [],
|
|
189
|
+
definition: {
|
|
190
|
+
widgets: [
|
|
191
|
+
{
|
|
192
|
+
reference: question.reference,
|
|
193
|
+
widget_type: "response",
|
|
194
|
+
},
|
|
195
|
+
],
|
|
196
|
+
},
|
|
197
|
+
}
|
|
198
|
+
[item, [question_learnosity]]
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
end
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module AtomicAssessmentsImport
|
|
4
|
+
module ExamSoft
|
|
5
|
+
module Extractor
|
|
6
|
+
class CorrectAnswerDetector
|
|
7
|
+
ANSWER_LABEL_PATTERN = /\AAnswer:\s*(.+)/i
|
|
8
|
+
|
|
9
|
+
def initialize(nodes, options)
|
|
10
|
+
@nodes = nodes
|
|
11
|
+
@options = options
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def detect
|
|
15
|
+
# First: check options for correct: true markers
|
|
16
|
+
from_options = @options.select { |opt| opt[:correct] }.map { |opt| opt[:letter] }
|
|
17
|
+
return from_options unless from_options.empty?
|
|
18
|
+
|
|
19
|
+
# Second: scan nodes for "Answer:" label
|
|
20
|
+
@nodes.each do |node|
|
|
21
|
+
text = node.text.strip
|
|
22
|
+
match = text.match(ANSWER_LABEL_PATTERN)
|
|
23
|
+
next unless match
|
|
24
|
+
|
|
25
|
+
answer_text = match[1].strip
|
|
26
|
+
# Parse comma/space-separated letters
|
|
27
|
+
letters = answer_text.split(/[\s,;]+/).map(&:strip).reject(&:empty?).map(&:downcase)
|
|
28
|
+
return letters unless letters.empty?
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
[]
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module AtomicAssessmentsImport
|
|
4
|
+
module ExamSoft
|
|
5
|
+
module Extractor
|
|
6
|
+
class FeedbackDetector
|
|
7
|
+
TILDE_PATTERN = /~\s*(.+)/m
|
|
8
|
+
LABEL_PATTERN = /\A\s*(?:Explanation|Rationale):\s*(.+)/im
|
|
9
|
+
OPTION_PATTERN = /\A\s*\*?[a-oA-O]\s*[.)]/
|
|
10
|
+
|
|
11
|
+
def initialize(nodes)
|
|
12
|
+
@nodes = nodes
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def detect
|
|
16
|
+
feedback_parts = []
|
|
17
|
+
collecting = false
|
|
18
|
+
|
|
19
|
+
@nodes.each do |node|
|
|
20
|
+
text = node.text.strip
|
|
21
|
+
|
|
22
|
+
if collecting
|
|
23
|
+
# Stop collecting if we hit an option line
|
|
24
|
+
break if text.match?(OPTION_PATTERN)
|
|
25
|
+
feedback_parts << text unless text.empty?
|
|
26
|
+
next
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
match = text.match(TILDE_PATTERN)
|
|
30
|
+
if match
|
|
31
|
+
first_part = match[1].strip
|
|
32
|
+
feedback_parts << first_part unless first_part.empty?
|
|
33
|
+
collecting = true
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
return feedback_parts.join(" ").gsub(/\s+/, " ").strip unless feedback_parts.empty?
|
|
38
|
+
|
|
39
|
+
@nodes.each do |node|
|
|
40
|
+
text = node.text.strip
|
|
41
|
+
match = text.match(LABEL_PATTERN)
|
|
42
|
+
return match[1].gsub(/\s+/, " ").strip if match
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
nil
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module AtomicAssessmentsImport
|
|
4
|
+
module ExamSoft
|
|
5
|
+
module Extractor
|
|
6
|
+
class MetadataDetector
|
|
7
|
+
FOLDER_PATTERN = /Folder:\s*(.+?)(?=\s*(?:Title:|Category:|(?<=\s)\d+[.)]))/i
|
|
8
|
+
TITLE_PATTERN = /Title:\s*(.+?)(?=\s*(?:Category:|(?<=\s)\d+[.)]))/i
|
|
9
|
+
CATEGORY_PATTERN = /Category:\s*(.+?)(?=\s*\d+[.)]|\z)/i
|
|
10
|
+
TYPE_PATTERN = /Type:\s*(\S+)/i
|
|
11
|
+
|
|
12
|
+
def initialize(nodes)
|
|
13
|
+
@nodes = nodes
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def detect
|
|
17
|
+
full_text = @nodes.map { |n| n.text.strip }.join(" ").gsub(/\s+/, " ")
|
|
18
|
+
result = {}
|
|
19
|
+
|
|
20
|
+
type_match = full_text.match(TYPE_PATTERN)
|
|
21
|
+
result[:type] = type_match[1].strip.downcase if type_match
|
|
22
|
+
|
|
23
|
+
folder_match = full_text.match(FOLDER_PATTERN)
|
|
24
|
+
result[:folder] = folder_match[1].strip if folder_match
|
|
25
|
+
|
|
26
|
+
title_match = full_text.match(TITLE_PATTERN)
|
|
27
|
+
result[:title] = title_match[1].strip if title_match
|
|
28
|
+
|
|
29
|
+
category_match = full_text.match(CATEGORY_PATTERN)
|
|
30
|
+
result[:categories] = category_match[1].split(/(?<!\s),(?!\s)/).map(&:strip) if category_match
|
|
31
|
+
|
|
32
|
+
result
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module AtomicAssessmentsImport
|
|
4
|
+
module ExamSoft
|
|
5
|
+
module Extractor
|
|
6
|
+
class OptionsDetector
|
|
7
|
+
OPTION_PATTERN = /\A\s*(\*?)([a-oA-O])\s*[.)]\s*(.+)/m
|
|
8
|
+
|
|
9
|
+
def initialize(nodes)
|
|
10
|
+
@nodes = nodes
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def detect
|
|
14
|
+
options = []
|
|
15
|
+
|
|
16
|
+
@nodes.each do |node|
|
|
17
|
+
text = node.text.strip
|
|
18
|
+
match = text.match(OPTION_PATTERN)
|
|
19
|
+
next unless match
|
|
20
|
+
|
|
21
|
+
marker = match[1]
|
|
22
|
+
letter = match[2].downcase
|
|
23
|
+
option_text = match[3].strip
|
|
24
|
+
|
|
25
|
+
correct = marker == "*" || bold_node?(node)
|
|
26
|
+
|
|
27
|
+
options << { text: option_text, letter: letter, correct: correct }
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
options
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
private
|
|
34
|
+
|
|
35
|
+
def bold_node?(node)
|
|
36
|
+
# Check if the node's first significant child is a <strong> or <b> element
|
|
37
|
+
node.css("strong, b").any? do |bold_el|
|
|
38
|
+
bold_el.text.strip == node.text.strip
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module AtomicAssessmentsImport
|
|
4
|
+
module ExamSoft
|
|
5
|
+
module Extractor
|
|
6
|
+
class QuestionStemDetector
|
|
7
|
+
OPTION_PATTERN = /\A\s*\*?[a-oA-O]\s*[.)]/
|
|
8
|
+
|
|
9
|
+
def initialize(nodes)
|
|
10
|
+
@nodes = nodes
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def detect
|
|
14
|
+
stem_node = @nodes.find do |node|
|
|
15
|
+
text = node.text.strip
|
|
16
|
+
next if text.empty?
|
|
17
|
+
next if text.match?(OPTION_PATTERN)
|
|
18
|
+
|
|
19
|
+
true
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
return nil unless stem_node
|
|
23
|
+
|
|
24
|
+
text = stem_node.text.strip
|
|
25
|
+
|
|
26
|
+
# Strip metadata prefixes and numbered prefix together
|
|
27
|
+
# e.g. "Folder: Geo Title: Q1 Category: Test 1) What is the capital?"
|
|
28
|
+
text = if text.match?(/\d+[.)]/m)
|
|
29
|
+
text.sub(/\A.*?(?<!\S)\d+[.)]\s*/m, "")
|
|
30
|
+
else
|
|
31
|
+
# Strip standalone metadata labels if present (Folder:, Title:, Category:, Type:)
|
|
32
|
+
text.sub(/\A\s*(?:(?:Folder|Title|Category|Type):\s*\S+\s*)*/, "")
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Split on tilde and take the first part (remove explanation)
|
|
36
|
+
text = text.split("~").first
|
|
37
|
+
|
|
38
|
+
text = text&.gsub(/\s+/, " ")&.strip
|
|
39
|
+
text.nil? || text.empty? ? nil : text
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|