docbook_status 0.1.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,143 +1,64 @@
1
- # -*- encoding: utf-8 -*-
1
+ # -*- encoding:utf-8 -*-
2
+ module DocbookStatus
2
3
 
3
- require 'xml'
4
+ # :stopdoc:
5
+ LIBPATH = ::File.expand_path(::File.dirname(__FILE__)) + ::File::SEPARATOR
6
+ PATH = ::File.dirname(LIBPATH) + ::File::SEPARATOR
7
+ # :startdoc:
4
8
 
5
- # Analyzes DocBook 5 documents for document structure (sections) and text length.
6
- #
7
- class DocbookStatus
8
-
9
- # :stopdoc
10
- #
11
- PATH = File.expand_path('../..', __FILE__) + File::SEPARATOR
12
- LIBPATH = File.expand_path('..', __FILE__) + File::SEPARATOR
13
- VERSION = File.read(PATH + '/version.txt').strip
14
- HOME = File.expand_path(ENV['HOME'] || ENV['USERPROFILE'])
15
- #
16
- # :startdoc
17
-
18
- # The DocBook 5 namespace URL
19
- DOCBOOK_NS = 'http://docbook.org/ns/docbook'
20
- # The XInclude namespace URL
21
- XINCLUDE_NS = 'http://www.w3.org/2001/XInclude'
22
-
23
- # Elements whose contents is counted as text
24
- @@text_elements = ['para','simpara','formalpara']
25
-
26
- # Section elements, following the list given in http://docbook.org/tdg5/en/html/ch02.html#roots
27
- # except for the refsect... elements.
28
- @@section_elements = %w[
29
- acknowledgements appendix article
30
- bibliography book
31
- chapter colophon
32
- dedication
33
- glossary
34
- index
35
- preface
36
- section sect1 sect2 sect3 sect4 set simplesect
37
- toc
38
- ]
39
-
40
- def initialize
41
- @sections = []
42
- end
43
-
44
- # Returns the version of docbook_status
9
+ # Returns the version string for the library.
45
10
  #
46
11
  def self.version
47
- VERSION
48
- end
49
-
50
- # Counts the words in the contents of the given node. _Word_ in this
51
- # context means something that is delimited by _space_ charactes and starts with
52
- # _word_ characters (in the regexp sense).
53
- #
54
- def count_words(node)
55
- words = node.content.strip.split(/[[:space:]]+/).find_all {|w| w =~ /\w+/}
56
- words.size
12
+ @version ||= File.read(path('version.txt')).strip
57
13
  end
58
14
 
59
- # Find the _title_ of the current section. That element is either
60
- # directly following or inside an _info_ element. Return the empty
61
- # string if no title can be found.
15
+ # Returns the library path for the module. If any arguments are given,
16
+ # they will be joined to the end of the libray path using
17
+ # <tt>File.join</tt>.
62
18
  #
63
- def find_section_title(node)
64
- title = node.find_first('./db:title')
65
- if title.nil?
66
- title = node.find_first './db:info/db:title'
67
- end
68
- if title.nil?
69
- ""
70
- else
71
- title.content
19
+ def self.libpath( *args, &block )
20
+ rv = args.empty? ? LIBPATH : ::File.join(LIBPATH, args.flatten)
21
+ if block
22
+ begin
23
+ $LOAD_PATH.unshift LIBPATH
24
+ rv = block.call
25
+ ensure
26
+ $LOAD_PATH.shift
27
+ end
72
28
  end
29
+ return rv
73
30
  end
74
31
 
75
- # Check the document elements for content and type recursively,
76
- # starting at the current node. Returns an array with paragraph and
77
- # section maps.
32
+ # Returns the lpath for the module. If any arguments are given,
33
+ # they will be joined to the end of the path using
34
+ # <tt>File.join</tt>.
78
35
  #
79
- def check_node(node, level, ctr)
80
- if (@@text_elements.include? node.name)
81
- ctr << {:type => :para, :level => level, :words => count_words(node)}
82
- elsif (@@section_elements.include? node.name)
83
- title = find_section_title(node)
84
- ctr << {:type => :section, :level => level, :title => title, :name => node.name}
85
- end
86
- node.children.each {|inner_elem| check_node(inner_elem, level+1, ctr)} if node.children?
87
- ctr
88
- end
89
-
90
- # Check whether the document has a DocBook default namespace
91
- def is_docbook?(doc)
92
- dbns = doc.root.namespaces.default
93
- (!dbns.nil? && (dbns.href.casecmp(DOCBOOK_NS) == 0))
94
- end
95
-
96
- # Check whether the document has a XInclude namespace
97
- def has_xinclude?(doc)
98
- ret = false
99
- doc.root.namespaces.each do |ns|
100
- if (ns.href.casecmp(XINCLUDE_NS) == 0)
101
- ret = true
102
- break
36
+ def self.path( *args, &block )
37
+ rv = args.empty? ? PATH : ::File.join(PATH, args.flatten)
38
+ if block
39
+ begin
40
+ $LOAD_PATH.unshift PATH
41
+ rv = block.call
42
+ ensure
43
+ $LOAD_PATH.shift
103
44
  end
104
45
  end
105
- ret
46
+ return rv
106
47
  end
107
48
 
108
- # Searches the XML document for sections and word counts. Returns an
109
- # array of sections with their word counts.
49
+ # Utility method used to require all files ending in .rb that lie in the
50
+ # directory below this file that has the same name as the filename passed
51
+ # in. Optionally, a specific _directory_ name can be passed in such that
52
+ # the _filename_ does not have to be equivalent to the directory.
110
53
  #
111
- def analyze_document(doc)
112
- # Add a namespace declaration for XPath expressions
113
- doc.root.namespaces.default_prefix = 'db'
114
- # Analyze the document starting with the root node
115
- doc_maps = check_node(doc.root,0,[])
116
- @sections = []
117
- section_name = doc_maps[0][:title]
118
- section_type = doc_maps[0][:name]
119
- section_ctr = 0
120
- section_level = 0
121
- doc_ctr = 0
122
- #puts doc_maps.inspect
123
- xms = doc_maps.drop(1)
124
- # Compute word counts per section
125
- xms.each do |m|
126
- if (m[:type] == :para)
127
- doc_ctr += m[:words]
128
- section_ctr += m[:words]
129
- else
130
- @sections << [section_name,section_ctr,section_level,section_type]
131
- section_name = m[:title]
132
- section_ctr = 0
133
- section_level = m[:level]
134
- section_type = m[:name]
135
- end
136
- end
137
- @sections << [section_name,section_ctr,section_level,section_type]
138
- # Put the document word count near the document type
139
- @sections[0][1] = doc_ctr
140
- @sections
54
+ def self.require_all_libs_relative_to( fname, dir = nil )
55
+ dir ||= ::File.basename(fname, '.*')
56
+ search_me = ::File.expand_path(
57
+ ::File.join(::File.dirname(fname), dir, '**', '*.rb'))
58
+
59
+ Dir.glob(search_me).sort.each {|rb| require rb}
141
60
  end
142
61
 
143
- end
62
+ end # module DocbookStatus
63
+
64
+ DocbookStatus.require_all_libs_relative_to(__FILE__)
@@ -0,0 +1,108 @@
1
+ # -*- encoding:utf-8 -*-
2
+
3
+ require 'yaml'
4
+ module DocbookStatus
5
+
6
+ # Manages the history of writing progress in two modes. In session
7
+ # or demon mode the history shows progress for the user session. In
8
+ # normal mode the history is only maintained for calendar days,
9
+ # weeks, months.
10
+ #
11
+ # The writing progress can (but must not) measured with these optional
12
+ # items:
13
+ # * start date (date of initialization)
14
+ # * scheduled end date
15
+ # * total word count goal
16
+ # * daily word count goal
17
+ #
18
+ # * file name
19
+ # * goal total
20
+ # * goal daily
21
+ # * start date
22
+ # * planned end date
23
+ # current entries
24
+ # * timestamp
25
+ # * word count
26
+ # archive entries
27
+ # * date
28
+ # * start
29
+ # * end
30
+ # * min
31
+ # * max
32
+ # * ctr (number of entries for the day)
33
+ #
34
+ class History
35
+
36
+ # History file, YAML format
37
+ HISTORY_FILE = 'dbs_work.yml'
38
+
39
+ # Does the history file exist?
40
+ def self.exists?()
41
+ File.exists?(HISTORY_FILE)
42
+ end
43
+
44
+ # Load the exisiting writing history
45
+ def initialize(name,end_planned=nil,goal_total=0,goal_daily=0)
46
+ if File.exists?(HISTORY_FILE)
47
+ @history = YAML.load_file(HISTORY_FILE)
48
+ else
49
+ @history = {:file => name,
50
+ :goal => {
51
+ :start => Date.today,
52
+ :end => end_planned,
53
+ :goal_total => goal_total,
54
+ :goal_daily => goal_daily},
55
+ :current => [],
56
+ :archive => {}}
57
+ end
58
+ end
59
+
60
+ def planned_end(date)
61
+ @history[:goal][:end]=date
62
+ end
63
+
64
+ def total_words(tw)
65
+ @history[:goal][:goal_total]=tw
66
+ end
67
+
68
+ def daily_words(tw)
69
+ @history[:goal][:goal_daily]=tw
70
+ end
71
+
72
+ # Add to the history
73
+ def add(ts,word_count)
74
+ # FIXME add demon mode
75
+ #@history[:current] << progress
76
+ #archive
77
+ k = ts.to_date
78
+ unless (@history[:archive][k].nil?)
79
+ @history[:archive][k][:min] = word_count if @history[:archive][k][:min] > word_count
80
+ @history[:archive][k][:max] = word_count if @history[:archive][k][:max] < word_count
81
+ @history[:archive][k][:end] = word_count
82
+ @history[:archive][k][:ctr] += 1
83
+ else
84
+ @history[:archive][k] = {:min => word_count, :max => word_count, :start => word_count, :end => word_count, :ctr => 1}
85
+ end
86
+ end
87
+
88
+ # Is there already a history?
89
+ def history?
90
+ @history[:archive].length != 0
91
+ end
92
+
93
+ # Convenience - returns the statistics for today
94
+ def today
95
+ @history[:archive][Date.today]
96
+ end
97
+
98
+ # Return the goals
99
+ def goals
100
+ @history[:goal]
101
+ end
102
+
103
+ # Save the writing history
104
+ def save
105
+ File.open(HISTORY_FILE, 'w') {|f| YAML.dump(@history,f)}
106
+ end
107
+ end
108
+ end
@@ -0,0 +1,258 @@
1
+ # -*- encoding:utf-8 -*-
2
+
3
+ require 'xml'
4
+ module DocbookStatus
5
+
6
+ # Analyzes DocBook 5 documents for document structure (sections) and text length.
7
+ #
8
+ class Status
9
+
10
+ # The DocBook 5 namespace URL
11
+ #
12
+ DOCBOOK_NS = 'http://docbook.org/ns/docbook'
13
+
14
+ # The XInclude namespace URL
15
+ #
16
+ XINCLUDE_NS = 'http://www.w3.org/2001/XInclude'
17
+
18
+ # Elements whose contents is counted as text. The _formalpara_
19
+ # elements are included implicitly because they contain _para_ child
20
+ # elements.
21
+ #
22
+ @@text_elements = ['para','simpara']
23
+
24
+ # Section elements, following the list given in http://docbook.org/tdg5/en/html/ch02.html#roots
25
+ # except for the refsect... elements.
26
+ #
27
+ @@section_elements = %w[
28
+ acknowledgements appendix article
29
+ bibliography book
30
+ chapter colophon
31
+ dedication
32
+ glossary
33
+ index
34
+ part preface
35
+ section sect1 sect2 sect3 sect4 set simplesect
36
+ toc
37
+ ]
38
+
39
+ attr_reader :doc
40
+
41
+ def initialize(fname=nil)
42
+ @sections = []
43
+ @remarks = []
44
+ @source = fname
45
+ @source_dir = fname.nil? ? nil : File.dirname(fname)
46
+ @source_file = fname.nil? ? nil : File.basename(fname)
47
+ @doc = nil
48
+ XML.default_line_numbers=true
49
+ end
50
+
51
+
52
+ # Return the remark-elements found in the document. If _keyword_ is
53
+ # nil then return all remarks, else only the ones with the right
54
+ # keyword.
55
+ #
56
+ def remarks(keyword=nil)
57
+ if keyword.nil?
58
+ @remarks
59
+ else
60
+ ukw = keyword.upcase
61
+ @remarks.find_all {|r| r[:keyword] == (ukw)}
62
+ end
63
+ end
64
+
65
+ # Counts the words in the contents of the given node. _Word_ in this
66
+ # context means something that is delimited by _space_ charactes and starts with
67
+ # _word_ characters (in the regexp sense).
68
+ #
69
+ def count_words(node)
70
+ words = node.content.strip.split(/[[:space:]]+/).find_all {|w| w =~ /\w+/}
71
+ words.size
72
+ end
73
+
74
+ # Counts the words in the contents of the given node.
75
+ # It is assumed that the node is a kind of pure content (a paragraph) and therefore everything in it
76
+ # should be included in the word count. An exception to this are
77
+ # _remark_ elements, which are conisdered as comments, not meant for final publication.
78
+ #
79
+ def count_content_words(node)
80
+ ws = count_words(node)
81
+ # Count the remark text contained in the paragraph and subtract it from the real thing
82
+ wsr = node.find('db:remark').reduce(0) {|m,r| m+count_words(r)}
83
+ ws - wsr
84
+ end
85
+
86
+ # Find the _title_ of the current section. That element is either
87
+ # directly following or inside an _info_ element. Return the empty
88
+ # string if no title can be found.
89
+ #
90
+ def find_section_title(node)
91
+ title = node.find_first('./db:title')
92
+ if title.nil?
93
+ title = node.find_first './db:info/db:title'
94
+ end
95
+ if title.nil?
96
+ ""
97
+ else
98
+ title.content
99
+ end
100
+ end
101
+
102
+ # Check the document elements for content and type recursively,
103
+ # starting at the current node. Returns an array with paragraph and
104
+ # section maps.
105
+ #
106
+ def check_node(node, level, ctr)
107
+ if (@@text_elements.include? node.name)
108
+ ctr << {:type => :para, :level => level, :words => count_content_words(node)}
109
+ elsif (@@section_elements.include? node.name)
110
+ title = find_section_title(node)
111
+ ctr << {:type => :section, :level => level, :title => title, :name => node.name}
112
+ node.children.each {|inner_elem| check_node(inner_elem, level+1, ctr)} if node.children?
113
+ else
114
+ node.children.each {|inner_elem| check_node(inner_elem, level+1, ctr)} if node.children?
115
+ end
116
+
117
+ ctr
118
+ end
119
+
120
+ # Check whether the document has a DocBook default namespace
121
+ def is_docbook?(doc)
122
+ dbns = doc.root.namespaces.default
123
+ (!dbns.nil? && (dbns.href.casecmp(DOCBOOK_NS) == 0))
124
+ end
125
+
126
+ # Check whether the document has a XInclude namespace
127
+ def has_xinclude?(doc)
128
+ ret = false
129
+ doc.root.namespaces.each do |ns|
130
+ if (ns.href.casecmp(XINCLUDE_NS) == 0)
131
+ ret = true
132
+ break
133
+ end
134
+ end
135
+ ret
136
+ end
137
+
138
+ # Finds and returns all XInclude files/URLs in a document.
139
+ #
140
+ # OPTIMIZE implement xpointer and fallback handling for
141
+ # xi:include? see http://www.w3.org/TR/xinclude/
142
+ #
143
+ def find_xincludes(doc)
144
+ if has_xinclude?(doc)
145
+ xincs = doc.find('//xi:include', "xi:"+XINCLUDE_NS)
146
+ xfiles = xincs.map {|x| x.attributes['href'] }
147
+ (xfiles << xfiles.map {|xf|
148
+ xfn = File.exists?(xf) ? xf : File.expand_path(xf,File.dirname(doc.root.base_uri))
149
+ xdoc = XML::Document.file(xfn)
150
+ find_xincludes(xdoc)
151
+ }).flatten
152
+ else
153
+ []
154
+ end
155
+ end
156
+
157
+ # Find all remark elements in the document and return a map for
158
+ # every such element. The map contains:
159
+ #
160
+ # * keyword: if the first word of the content is uppercase that is the keyword, else _REMARK_
161
+ # * text: the content of the remark element, minus the keyword
162
+ # * file: the name of the source file
163
+ # * line: the line number in the source file
164
+ #
165
+ # OPTIMIZE look for 'role' attributes as keywords?
166
+ #
167
+ def find_remarks_in_doc(doc,source)
168
+ rems = doc.find('//db:remark')
169
+ rems.map {|rem|
170
+ c = rem.content.strip
171
+ kw = 'REMARK'
172
+ if rem.first.text?
173
+ kw1 = c.match('^([[:upper:]]+)([[:space:][:punct:]]|$)')
174
+ unless kw1.nil?
175
+ kw = kw1[1]
176
+ c = kw1.post_match.lstrip
177
+ end
178
+ end
179
+ # TODO XPath integrieren? :path => rem.path, :parent => rem.parent.path,
180
+ {:keyword => kw, :text => c, :file=>source, :line => rem.line_num}
181
+ }
182
+ end
183
+
184
+ # Finds the remarks by looking through all the Xincluded files
185
+ #
186
+ def find_remarks(filter=[])
187
+ if (@source.nil?)
188
+ rfiles = find_xincludes(@doc)
189
+ else
190
+ @doc = XML::Document.file(@source)
191
+ rfiles = [@source_file] + find_xincludes(@doc)
192
+ end
193
+ @remarks = rfiles.map {|rf|
194
+ ind = XML::Document.file(File.expand_path(rf,@source.nil? ? '.' : @source_dir))
195
+ ind.root.namespaces.default_prefix = 'db'
196
+ rems = find_remarks_in_doc(ind, rf)
197
+ rems
198
+ }.flatten
199
+ if (filter.empty?)
200
+ @remarks
201
+ else
202
+ filter.map {|f|
203
+ @remarks.find_all {|r| f.casecmp(r[:keyword]) == 0}
204
+ }.flatten
205
+ end
206
+ end
207
+
208
+ # Searches the XML document for sections and word counts. Returns an
209
+ # array of sections (map) with title, word count, section level and DocBook tag.
210
+ #
211
+ def analyze_document(doc)
212
+ # Add a namespace declaration for XPath expressions
213
+ doc.root.namespaces.default_prefix = 'db'
214
+ # Analyze the document starting with the root node
215
+ doc_maps = check_node(doc.root,0,[])
216
+ @sections = []
217
+ section_name = doc_maps[0][:title]
218
+ section_type = doc_maps[0][:name]
219
+ section_ctr = 0
220
+ section_level = 0
221
+ doc_ctr = 0
222
+ #puts doc_maps.inspect
223
+ xms = doc_maps.drop(1)
224
+ # Compute word counts per section
225
+ xms.each do |m|
226
+ if (m[:type] == :para)
227
+ doc_ctr += m[:words]
228
+ section_ctr += m[:words]
229
+ else
230
+ @sections << {:title => section_name, :words => section_ctr, :level => section_level, :tag => section_type}
231
+ section_name = m[:title]
232
+ section_ctr = 0
233
+ section_level = m[:level]
234
+ section_type = m[:name]
235
+ end
236
+ end
237
+ @sections << {:title => section_name, :words => section_ctr, :level => section_level, :tag => section_type}
238
+ # Put the document word count near the document type
239
+ @sections[0][:words] = doc_ctr
240
+ @sections
241
+ end
242
+
243
+ # Open the XML document, check for the DocBook5 namespace and finally
244
+ # apply Xinclude tretement to it, if it has a XInclude namespace.
245
+ # Returns a map with the file name, the file's modification time, and the section structure.
246
+ #
247
+ def analyze_file
248
+ full_name = File.expand_path(@source)
249
+ changed = File.ctime(@source)
250
+ @doc = XML::Document.file(@source)
251
+ raise ArgumentError, "Error: #{@source} is apparently not DocBook 5." unless is_docbook?(@doc)
252
+ @doc.xinclude if has_xinclude?(@doc)
253
+ sections = analyze_document(@doc)
254
+ {:file => full_name, :modified => changed, :sections => sections}
255
+ end
256
+
257
+ end
258
+ end