PyPI - passagemath-sws2rst - Versions diffs - 10.4.1__py3-none-any.whl - Mend

passagemath-sws2rst 10.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

passagemath_sws2rst-10.4.1.data/scripts/sage-sws2rst +198 -0
passagemath_sws2rst-10.4.1.dist-info/METADATA +20 -0
passagemath_sws2rst-10.4.1.dist-info/RECORD +9 -0
passagemath_sws2rst-10.4.1.dist-info/WHEEL +5 -0
passagemath_sws2rst-10.4.1.dist-info/top_level.txt +1 -0
sage_sws2rst/__init__.py +0 -0
sage_sws2rst/comments2rst.py +480 -0
sage_sws2rst/results2rst.py +171 -0
sage_sws2rst/worksheet2rst.py +203 -0

passagemath_sws2rst-10.4.1.data/scripts/sage-sws2rst ADDED Viewed

@@ -0,0 +1,198 @@
+#!python
+# -*- coding: utf-8 -*-
+r"""
+sage-sws2rst
+============
+Translate a Sage worksheet file (.sws) into an rst file. The result is
+saved in the current working directory.
+Usage::
+    sage --sws2rst [-h] <source sws file>
+Print the help message::
+    sage --sws2rst -h
+EXAMPLES::
+    sage --sws2rst file.sws
+AUTHORS:
+- Pablo Angulo (January 2011): Initial version
+- Karl-Dieter Crisman (June 2012): Documentation
+  and minor refinements
+- Karl-Dieter Crisman (November 2014): Correct use of temporary files,
+  see :trac:`17308`.
+"""
+#*****************************************************************************
+#       Copyright (C) 2011 Pablo Angulo
+#       Copyright (C) 2012-2014 Karl-Dieter Crisman
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 2 of the License, or
+# (at your option) any later version.
+#                  https://www.gnu.org/licenses/
+#*****************************************************************************
+import sys
+import tarfile
+import os
+import shutil
+import codecs
+import tempfile
+from sage_sws2rst.worksheet2rst import worksheet2rst
+from optparse import OptionParser
+def process_sws(sws_file):
+    """
+    Process the ``.sws`` file ``sws_file`` and create an ``.rst`` file
+    (and possible media files) in the current working directory.
+    """
+    base_name = os.path.basename(os.path.splitext(sws_file)[0])
+    base_name = base_name.replace(' ','_')
+    tempdir = tempfile.mkdtemp()
+    try:
+        with tarfile.open(sws_file, mode='r:bz2') as sws_file:
+            sws_file.extractall(tempdir)
+        worksheet_dir = os.path.join(tempdir, 'sage_worksheet')
+        if not os.path.isdir(worksheet_dir):
+            raise RuntimeError("Worksheeet file %r does not contain a 'sage_worksheet' directory" % sws_file)
+        process_worksheet(worksheet_dir, base_name)
+    finally:
+        shutil.rmtree(tempdir)
+def process_worksheet(worksheet_dir, base_name):
+    """
+    Process the extracted worksheet directory ``worksheet_dir`` and
+    create the ``.rst`` and media files with base name ``base_name``.
+    Files are moved from ``worksheet_dir``, so make sure these are
+    temporary files!
+    """
+    #Images
+    images_dir = base_name + '_media'
+    try:
+        os.mkdir(images_dir)
+    except OSError:
+        if not os.path.isdir(images_dir):
+            raise
+    #"data" dir
+    data_path = os.path.join(worksheet_dir, 'data')
+    if os.path.isdir(data_path):
+        for image in os.listdir(data_path):
+            shutil.move(os.path.join(data_path, image), os.path.join(images_dir, image.replace(' ','_')))
+    #cells
+    cells_path = os.path.join(worksheet_dir, 'cells')
+    if os.path.isdir(cells_path):
+        for cell in os.listdir(cells_path):
+            cell_path = os.path.join(cells_path, cell)
+            for image in os.listdir(cell_path):
+                if os.path.isfile(os.path.join(cell_path, image)):
+                    shutil.move(os.path.join(cell_path, image),
+                                 os.path.join(images_dir, 'cell_%s_%s'%(cell,image)))
+                # could be Jmol image directory - code for future
+                #elif os.path.isdir(os.path.join(cell_path, image)):
+                #    if image == '.jmol_images':
+                #        for jmolimg in os.listdir(os.path.join(cell_path, image)):
+                #            shutil.move(os.path.join(cell_path, image, jmolimg),
+                #                     os.path.join(images_dir, 'cell_%s_%s'%(cell,jmolimg)))
+    #read html file, parse it, write rst file
+    html_file = os.path.join(worksheet_dir, 'worksheet.html')
+    with codecs.open(html_file, mode='r', encoding='utf-8') as f:
+        html_text = f.read()
+    rst_text = worksheet2rst(html_text, images_dir=images_dir)
+    rst_file = base_name + '.rst'
+    with codecs.open(rst_file, mode='w', encoding='utf-8') as out_file:
+        out_file.write(rst_text)
+    print("File at", rst_file)
+    print("Image directory at", images_dir)
+# Set the parser
+usage = r"""
+    sage --sws2rst [options]  <source sws file> ...
+Translate a Sage worksheet file (.sws) into an reStructuredText
+(.rst) file.  At least one sws file argument is required; all sws
+files will be parsed and translated.  Spaces in the names of the
+worksheet will be converted to underscores. The resulting files will
+be stored in the current working directory.
+Examples:
+    sage --sws2rst file.sws
+    sage --sws2rst file1.sws file2.sws file3.sws
+    sage --sws2rst -h # this help message prints
+    sage --sws2rst --sphinxify # information about how to use
+                               # Sphinx to compile your rst file
+"""
+sphinxify_text = r"""
+Once you have made your rst file, what can you do with it?
+If this is a file which is likely to become part of the Sage
+standard documentation, you will want to edit the appropriate
+file in $SAGE_ROOT/src/doc to include your file, or
+simply include your file as appropriate.
+However, you may simply want to make great-looking documentation
+for some other purpose out of your worksheet.  The following
+steps are one way to do so.
+ - Assume that the generated .rst file is ``My_Project.rst``.
+ - Make a folder somewhere convenient to compile in, say, ``MyProject``.
+ - Then move your .rst file into that folder, and cd into it.
+ - Now the key is to use Sage's shell to run Sphinx on it! Run ``sage --sh``.
+ - Then type ``sphinx-quickstart`` and follow the instructions in the
+   Sphinx tutorial [1]_. You will probably want to choose to render math
+   with MathJax [2]_, but you can accept the defaults for the other options.
+ - Finally, edit ``index.rst`` by adding ``My_Project`` in the table of
+   contents, as detailed in the Sphinx tutorial [3]_.
+ - If you now type ``make html`` you should get a beautiful-looking web page
+   in ``_build/html``. If you did not have a header at the top of your worksheet,
+   you may get an error, but you can ignore this.
+REFERENCES:
+.. [1] Getting Started,
+   https://www.sphinx-doc.org/en/master/usage/quickstart.html
+.. [2] MathJax,
+   http://www.mathjax.org/
+.. [3] Defining Document Structure, Getting Started,
+   https://www.sphinx-doc.org/en/master/usage/quickstart.html#defining-document-structure"""
+parser = OptionParser(usage=usage)
+parser.add_option("--sphinxify",
+                  action="store_true", dest="sphinxify",
+                  help="Print information about how to use Sphinx to compile your rst file, then exit.")
+(options, args) = parser.parse_args()
+# Parse option
+if options.sphinxify:
+    print(sphinxify_text)
+    sys.exit(0)
+# Parse arguments
+if len(args) < 1:
+    parser.print_usage()
+    sys.exit(1)
+for file_name in args:
+    print("Processing", file_name)
+    process_sws(file_name)

passagemath_sws2rst-10.4.1.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,20 @@
+Metadata-Version: 2.1
+Name: passagemath-sws2rst
+Version: 10.4.1
+Summary: passagemath: SageNB worksheet converter
+Author-email: The Sage Developers <sage-support@googlegroups.com>
+License: GNU General Public License (GPL) v3 or later
+Project-URL: Homepage, https://www.sagemath.org
+Description-Content-Type: text/x-rst
+sage_sws2rst: Translate legacy Sage worksheet files (.sws) to reStructuredText (.rst) files
+===========================================================================================
+Description
+-----------
+Provides a script `sage-sws2rst`, which translates a Sage worksheet file (.sws) into a reStructuredText (.rst) file.
+Sage worksheet files (.sws) are a file format that was used by the now-obsolete Sage notebook (https://github.com/sagemath/sagenb), superseded by the Jupyter notebook.  SageNB was dropped in the course of the transition of SageMath to Python 3.
+This package was extracted from the SageNB sources in https://github.com/sagemath/sage/issues/28838 to provide a way to convert pedagogical material written available in Sage worksheet format.

passagemath_sws2rst-10.4.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+passagemath_sws2rst-10.4.1.data/scripts/sage-sws2rst,sha256=Cs5Wm2qtiZ9cGiOYXrvxttYlV4vzQD8DSzHCUSe5d2U,6869
+sage_sws2rst/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+sage_sws2rst/comments2rst.py,sha256=C0HUDmY8OOhisZSk5d6WPstfY8Uw-nBM6X3gFSw-M1o,15609
+sage_sws2rst/results2rst.py,sha256=Mu06rryTWeQ7C9QeEtcN88teUkmDfXR0iRJcpYljkgI,6046
+sage_sws2rst/worksheet2rst.py,sha256=smRjUEttzUXWVIephNcjq54yIH0cfcCkJEmPJNgtGjM,5987
+passagemath_sws2rst-10.4.1.dist-info/METADATA,sha256=LVdNYT3faDQlz_QhXYwDDaspWRYU07aXow_f8w0Res0,1086
+passagemath_sws2rst-10.4.1.dist-info/WHEEL,sha256=Mdi9PDNwEZptOjTlUcAth7XJDFtKrHYaQMPulZeBCiQ,91
+passagemath_sws2rst-10.4.1.dist-info/top_level.txt,sha256=NXbIX8bi906EaXF0_SYm-oymqgGkZRQ_laYlHnr-2rc,13
+passagemath_sws2rst-10.4.1.dist-info/RECORD,,

passagemath_sws2rst-10.4.1.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (73.0.1)
+Root-Is-Purelib: true
+Tag: py3-none-any

passagemath_sws2rst-10.4.1.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ sage_sws2rst

sage_sws2rst/__init__.py ADDED Viewed

File without changes

sage_sws2rst/comments2rst.py ADDED Viewed

@@ -0,0 +1,480 @@
+# -*- coding: utf-8 -*-
+r"""
+Convert html from text cells in the notebook into ReStructuredText
+This is called by sws2rst
+- Pablo Angulo Ardoy (2011-02-25): initial version
+"""
+#**************************************************
+# Copyright (C) 2011 Pablo Angulo
+#
+# Distributed under the terms of the GPL License
+#**************************************************
+import re
+import os
+try:
+    from bs4 import (BeautifulSoup, Tag,
+                     CData, Comment, Declaration, ProcessingInstruction)
+except ImportError:
+    raise ImportError("""BeautifulSoup must be installed.
+Please either install using
+    sage -pip install beautifulsoup4
+""")
+#negative lookbehind: http://www.regular-expressions.info/lookaround.html
+double_dollar = re.compile(r'(?<!\\)\$\$')
+def preprocess_display_latex(text):
+    r"""replace $$some display latex$$ with <display>some display latex</display>
+    before the soup is built.
+    Deals with the situation where <p></p> tags are mixed
+    with $$, like $$<p>display_latex$$</p>, unless the mess is huge
+    EXAMPLES::
+        >>> from sage_sws2rst.comments2rst import preprocess_display_latex
+        >>> s="$$a=2$$"
+        >>> preprocess_display_latex(s)
+        '<display>a=2</display>'
+        >>> s="<p>$$a=2$$</p>"
+        >>> preprocess_display_latex(s)
+        '<p><display>a=2</display></p>'
+        >>> s="<p>$$a=2</p>$$"
+        >>> preprocess_display_latex(s)
+        '<p><display>a=2</display></p>'
+        >>> s="$$<p>a=2</p>$$"
+        >>> preprocess_display_latex(s)
+        '<display>a=2</display>'
+    """
+    ls = []
+    start_tag = True
+    parts = double_dollar.split(text)
+    for c in parts[:-1]:
+        if start_tag:
+            ls.append(c)
+            ls.append('<display>')
+        else:
+            c0, count = prune_tags(c)
+            ls.append(c0)
+            ls.append('</display>')
+            if count == 1:
+                ls.append('<p>')
+            elif count == -1:
+                ls.append('</p>')
+            elif abs(count)>1:
+                raise Exception('display latex was messed up with html code')
+        start_tag = not start_tag
+    ls.append(parts[-1])
+    return ''.join(ls)
+def prune_tags(text):
+    count = text.count('<p>') - text.count('</p>')
+    return text.replace('<br/>','').replace('<br />','').replace('<p>','').replace('</p>',''), count
+escapable_chars = { '+' :r'\+',
+                    '*' :r'\*',
+                    '|' :r'\|',
+                    '-' :r'\-'}
+def escape_chars(text):
+    for c, r in escapable_chars.items():
+        text = text.replace(c, r)
+    return text
+#This is supposed to be handled by BeautifulSoup, but doesn't work
+xml_entities = {'&lt;':'<',
+            '&gt;':'>',
+            '&amp;':'&',
+            '&quot;':'"',
+            '&apos;':"'",
+}
+def replace_xml_entities(text):
+    for c, r in xml_entities.items():
+        text = text.replace(c, r)
+    return text
+def replace_courier(soup):
+    """Lacking a better option, I use courier font to mark <code>
+    within tinyMCE. And I want to turn that into real code tags.
+    Most users won't be needing this(?), so this code is not called anywhere
+    but kept for reference
+    """
+    for t in soup.findAll(lambda s: ('style' in s) and 'courier' in s['style']):
+        tag = Tag(soup, 'code')
+        while t.contents:
+            tag.append(t.contents[0])
+        t.replaceWith(tag)
+#negative lookbehind: http://www.regular-expressions.info/lookaround.html
+single_dollar = re.compile(r'(?<!\\)\$')
+def replace_latex(soup):
+    r"""Replaces inline latex by :math:`code` and escapes
+    some rst special chars like +, -, * and | outside of inline latex
+    does not escape chars inside display or pre tags
+    EXAMPLES::
+        >>> from sage_sws2rst.comments2rst import replace_latex
+        >>> from bs4 import BeautifulSoup
+        >>> soup = r"<p>Some <strong>latex: $e^\pi i=-1$</strong></p>"
+        >>> s = BeautifulSoup(soup, features='html.parser')
+        >>> replace_latex(s)
+        >>> s
+        <p>Some <strong>latex: :math:`e^\pi i=-1`</strong></p>
+    ::
+        >>> soup = "<p><strong>2+2 | 1+3</strong></p>"
+        >>> s = BeautifulSoup(soup, features='html.parser')
+        >>> replace_latex(s)
+        >>> s
+        <p><strong>2\+2 \| 1\+3</strong></p>
+    """
+    for t in soup.findAll(text=re.compile('.+')):
+        if (t.fetchParents(name = 'display') or
+            t.fetchParents(name = 'pre')        ):
+            continue
+        parts = single_dollar.split(t)
+        even  = [escape_chars(parts[i]) for i in range(0,len(parts),2)]
+        odd   = [' :math:`%s`'%parts[i] for i in range(1,len(parts),2)]
+        odd.append('')
+        t.replaceWith(''.join(''.join(p) for p in zip(even,odd) ))
+class Soup2Rst(object):
+    """builds the rst text from the Soup Tree
+    """
+    tags = {'h1':'header',
+            'h2':'header',
+            'h3':'header',
+            'h4':'header',
+            'h5':'header',
+            'h6':'header',
+            'p': 'p',
+            '[document]': 'document',
+            'address': 'em',
+            'br': 'br',
+            'b':'strong',
+            'strong':'strong',
+            'em':'em',
+            'pre':'pre',
+            'code':'code',
+            'display':'display',
+            'span':'inline_no_tag',
+            'ul':'ul',
+            'ol':'ol',
+            'li':'li',
+            'a':'a',
+            'table':'table',
+#            'tr':'tr',
+            'td':'inline_no_tag',
+            'th':'inline_no_tag',
+            'tt':'inline_no_tag',
+            'div':'block_no_tag',
+            'img':'img',
+#            '':'',
+            }
+    headers = {'h1':'=',
+               'h2':'-',
+               'h3':'^',
+               'h4':'"',
+               'h5':'~',
+               'h6':'*',
+               }
+    def __init__(self, images_dir):
+        self.images_dir = images_dir
+        self._nested_list = -1
+        self._inside_ol_or_ul = []
+        self._inside_code_tag = False
+    def visit(self, node):
+        if isinstance(node, (CData, Comment, Declaration, ProcessingInstruction)):
+            return ''
+        elif hasattr(node, 'name') and node.name in self.tags:
+                method = 'visit_' + self.tags[node.name]
+                visitor = getattr(self, method)
+                return visitor(node)
+        else:
+            #Assume plain string
+            return str(node).replace('\n','')
+    def visit_document(self, node):
+        return '\n'.join(self.visit(tag) for tag in node.contents)
+    def get_plain_text(self, node):
+        """Gets all text, removing all tags"""
+        if hasattr(node, 'contents'):
+            t = ' '.join(self.get_plain_text(tag) for tag in node.contents)
+        else:
+            t = str(node)
+        return t.replace('\n','')
+    def visit_header(self, node):
+        s = ''.join(self.visit(tag) for tag in node.contents)
+        spacer = self.headers[node.name]*len(s)
+        return s.replace( '\n', '') +  '\n' + spacer
+    def visit_pre(self, node):
+        return '::\n\n    '+str(node)[5:-6].replace('<br />','\n').replace('<br></br>','\n').replace('\n','\n    ')
+    def visit_ul(self, node):
+        self._nested_list += 1
+        self._inside_ol_or_ul.append(False)
+        result = '\n\n'+''.join(self.visit(tag) for tag in node.contents)+'\n'
+        self._inside_ol_or_ul.pop()
+        self._nested_list -= 1
+        return result
+    def visit_ol(self, node):
+        self._nested_list += 1
+        self._inside_ol_or_ul.append(True)
+        result = '\n\n'+''.join(self.visit(tag) for tag in node.contents)+'\n'
+        self._inside_ol_or_ul.pop()
+        self._nested_list -= 1
+        return result
+    def visit_li(self, node):
+        return (' '*self._nested_list
+                + ('#. ' if self._inside_ol_or_ul[-1] else '- ')
+                +' '.join(self.visit(tag) for tag in node.contents)
+                + '\n')
+    def visit_display(self, node):
+        return ('\n\n.. MATH::\n\n    ' +
+                str(node)[9:-10].replace('<br></br>','\n').replace('\n','\n    ') +
+                '\n\n.. end of math\n\n')
+    def visit_img(self, node):
+        return '.. image:: ' + os.path.join(self.images_dir, node['src'].replace(' ','_')) + '\n    :align: center\n'
+    def visit_table(self,node):
+        rows = []
+        for elt in node.contents:
+            if not hasattr(elt,'name'):
+                pass
+            elif elt.name == 'thead':
+                rows.extend(self.prepare_tr(row)
+                            for row in elt
+                            if hasattr(row,'name') and
+                            row.name=='tr')
+                rows.append([]) #this row represents a separator
+            elif (elt.name == 'tbody') or (elt.name == 'tfoot'):
+                rows.extend(self.prepare_tr(row)
+                            for row in elt
+                            if hasattr(row,'name') and
+                            row.name=='tr')
+            elif elt.name == 'tr':
+                rows.append(self.prepare_tr(elt))
+        ncols = max(len(row) for row in rows)
+        for row in rows:
+            if len(row) < ncols:
+                row.extend( ['']*(ncols - len(row)))
+        cols_sizes = [max(len(td) for td in tds_in_col)
+                      for tds_in_col in zip(*rows)]
+        result = [' '.join('='*c for c in cols_sizes)]
+        for row in rows:
+            if any(td for td in row):
+                result.append(' '.join(td+' '*(l - len(td))
+                                       for l,td in zip(cols_sizes,row)))
+            else:
+                result.append(' '.join('-'*c for c in cols_sizes))
+        result.append(' '.join('='*c for c in cols_sizes))
+        return '\n'.join(result)
+    def prepare_tr(self, node):
+        return [self.visit(tag) for tag in node.contents if tag!='\n']
+    def visit_br(self, node):
+        return '\n\n'
+    def visit_strong(self, node):
+        if node.contents:
+            content = ' '.join(self.visit(tag) for tag in node.contents).strip()
+            if not content:
+                return ''
+            elif '``' in content:
+                return content
+            else:
+                return ' **' + content + '** '
+        else:
+            return ''
+    def visit_em(self,node):
+        if node.contents:
+            content = ' '.join(self.visit(tag) for tag in node.contents).strip()
+            if not content:
+                return ''
+            elif '``' in content:
+                return content
+            else:
+                return ' *' + content + '* '
+        else:
+            return ''
+    def visit_code(self, node):
+        if node.contents:
+            content = self.get_plain_text(node).strip()
+            return '``' + content + '``'
+        else:
+            return ''
+    def visit_inline_no_tag(self, node):
+        return (' '.join(self.visit(tag)
+                         for tag in node.contents)).strip()
+    def visit_block_no_tag(self, node):
+        return '\n'.join(self.visit(tag) for tag in node.contents) + '\n'
+    def visit_p(self, node):
+        return ''.join(self.visit(tag) for tag in node.contents) + '\n\n'
+    def visit_a(self, node):
+        c = ' '.join(self.visit(tag) for tag in node.contents)
+        try:
+            link = node['href']
+            if link[0]=='#':
+                return ':ref:`%s <%s>`'%(c, link[1:])
+            else:
+                return '`%s <%s>`_'%(c, link)
+        except KeyError:
+            return '.. _%s:\n\n'%node['name']
+def html2rst(text, images_dir):
+    r"""
+    Convert html, typically generated by tinyMCE, into rst
+    compatible with Sage documentation.
+    The main job is done by BeautifulSoup, which is much more
+    robust than conventional parsers like HTMLParser, but also
+    several details specific of this context are taken into
+    account, so this code differs from generic approaches like
+    those found on the web.
+    INPUT:
+    - ``text`` -- string -- a chunk of HTML text
+    - ``images_dir`` -- string -- folder where images are stored
+    OUTPUT:
+    - string -- rst text
+    EXAMPLES::
+        >>> from sage_sws2rst.comments2rst import html2rst
+        >>> text = r'<p>Some text with <em>math</em>: $e^{\pi i}=-1$</p>'
+        >>> html2rst(text, '')
+        'Some text with  *math* :  :math:`e^{\\pi i}=-1`\n\n'
+    ::
+        >>> text = '<p>Text with <em>incorrect</p> nesting</em>.'
+        >>> html2rst(text, '')
+        'Text with  *incorrect* \n\n nesting\n.'
+    ::
+        >>> text = '<pre>Preformatted: \n    a+2\n</pre><p> Not preformatted: \n    a+2\n</p>'
+        >>> html2rst(text, '')
+        '::\n\n    Preformatted: \n        a+2\n    \n Not preformatted:     a\\+2\n\n'
+    ::
+        >>> text = '&aacute;ñ&nbsp;&ntildeá'
+        >>> html2rst(text, '')
+        '\xe1\xf1 \xf1\xe1'
+    ::
+        >>> text = r'<p>some text</p><p>$$</p><p>3.183098861 \cdot 10^{-1}</p><p>$$</p>'
+        >>> html2rst(text, '')
+        'some text\n\n.. MATH::\n\n    3.183098861 \\cdot 10^{-1}\n\n.. end of math\n\n'
+    When the content is empty::
+        >>> html2rst("<strong></strong> ", '')
+        '\n '
+        >>> html2rst("<strong> </strong> ", '')
+        '\n '
+        >>> html2rst("<em></em> ", '')
+        '\n '
+        >>> html2rst("<em> </em> ", '')
+        '\n '
+    Spaces are added around *italic* or **bold** text (otherwise, it
+    may be invalid ReStructuredText syntax)::
+        >>> text = '<p><strong>Exercice.</strong>Let x be ...</p>'
+        >>> html2rst(text, '')
+        ' **Exercice.** Let x be ...\n\n'
+        >>> text = '<p><em>Exercice.</em>Let x be ...</p>'
+        >>> html2rst(text, '')
+        ' *Exercice.* Let x be ...\n\n'
+    Below is an example showing the translation from html to rst is not
+    always perfect.
+    Here the strong emphasis is on more than one line and is not properly
+    translated::
+        >>> text='<p>You will find a <em>while loop</em> helpful here. Below is a simple example:</p><p style="padding-left: 30px;"><strong>x = 0<br />while x &lt; 7:<br />&nbsp;&nbsp;&nbsp; x = x + 2<br />&nbsp;&nbsp;&nbsp; print x</strong></p>'
+        >>> html2rst(text, '')
+        'You will find a  *while loop*  helpful here. Below is a simple
+        example:\n\n **x = 0 \n\n while x < 7: \n\n     x = x \\+ 2 \n\n
+        print x** \n\n'
+    """
+    #replace $$some display latex$$ with
+    #<display>some display latex</display>
+    text = preprocess_display_latex(text)
+    #eliminate nasty &nbsp;
+    text = text.replace('&nbsp;',' ')
+    #BeautifulSoup is better than BeautifulSoup
+    #for html that wasn't generated by humans (like tinyMCE)
+    soup = BeautifulSoup(text,
+                         features='html.parser',
+                         # https://stackoverflow.com/questions/11856011/beautifulsoup-has-no-attribute-html-entities
+                         ##convertEntities=BeautifulSoup.ALL_ENTITIES
+                         )
+    #remove all comments
+    comments = soup.findAll(text=lambda text:isinstance(text, Comment))
+    for comment in comments:
+        comment.extract()
+#    replace_courier(soup)
+    replace_latex(soup)
+    v = Soup2Rst(images_dir)
+#    return v.visit(soup)
+    text = v.visit(soup)
+    more_than_2_blank_lines = re.compile(r'\n\n+', re.MULTILINE)
+    text = more_than_2_blank_lines.sub('\n\n', text)
+    text = replace_xml_entities(text)
+    return text
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)

sage_sws2rst/results2rst.py ADDED Viewed

@@ -0,0 +1,171 @@
+# -*- coding: utf-8 -*-
+r"""
+Convert output from code cells in the notebook into ReStructuredText
+This is called by sws2rst
+- Pablo Angulo Ardoy (2011-02-25): initial version
+"""
+#**************************************************
+# Copyright (C) 2011 Pablo Angulo
+#
+# Distributed under the terms of the GPL License
+#**************************************************
+import re
+IMAGES_DIR = 'images/'
+#We parse lines one by one but keep track of current scope
+#similarly to worksheet2rst.py
+#Results are split into different types. Some are discarded
+class States(object):
+    NORMAL = 0
+    HTML = 1
+    MATH = 2
+    TRACEBACK = 3
+class LineTypes(object):
+    PLAIN = 0
+    IMAGE = 1
+    LATEX = 2
+    HTML  = 3
+    TRACE = 4
+class ResultsParser(object):
+    """Auxiliary class for results2rst
+    """
+    def __init__(self, images_dir):
+        ##Order matters, place more restrictive regex's before more general ones
+        ##If no regex matches, line will be discarded
+        ##a self transition is needes to produce any output
+        self.transitions = {
+            States.NORMAL:[
+                #IMAGE
+                     (re.compile(r"^\<html\>\<font color='black'\>"
+                                 r"\<img src='cell\://(.*?)'\>"
+                                 r"\</font\>\</html\>"),
+                      "\n.. image:: " + images_dir + "\\1\n    :align: center\n",
+                      LineTypes.IMAGE,
+                      States.NORMAL),
+                #SELF-CONTAINED MATH
+                     (re.compile(r"^\<html\>\<div class=\"math\"\>"
+                                 r"\\newcommand\{\\Bold\}\[1\]\{\\mathbf\{\#1\}\}"
+                                 r"(.*?)\</div\>\</html\>$"),
+                      "\n.. MATH::\n\n    \\1\n",
+                      LineTypes.LATEX,
+                      States.NORMAL),
+                #SELF-CONTAINED MATH - BIS
+                     (re.compile(r"^\<html\>\<div class=\"math\"\>"
+                                 r"(.*?)\</div\>\</html\>$"),
+                      "\n.. MATH::\n\n    \\1",
+                      LineTypes.LATEX,
+                      States.NORMAL),
+                #START Traceback
+                     (re.compile(r"^(Traceback.*)"),
+                      "    Traceback (most recent call last):",
+                      LineTypes.TRACE,
+                      States.TRACEBACK),
+                #START MATH
+                     (re.compile(r"^\<html\>\<div class=\"math\"\>"
+                                 r"\\newcommand\{\\Bold\}\[1\]\{\\mathbf\{\#1\}\}(.*?)"),
+                      "\n.. MATH::\n\n    \\1",
+                      LineTypes.LATEX,
+                      States.MATH),
+                #SELF-CONTAINED HTML
+                     (re.compile(r"^\<html\>.*</html\>$"),
+                      "    <html>...</html>",
+                      LineTypes.HTML,
+                      States.NORMAL),
+                #START HTML
+                     (re.compile(r"^\<html\>.*"),
+                      "    <html>...</html>",
+                      LineTypes.HTML,
+                      States.HTML),
+                #CONTINUE NORMAL
+                     (re.compile("(.*)"),
+                      "    \\1",
+                      LineTypes.PLAIN,
+                      States.NORMAL),
+                ],
+            States.MATH:[
+                 #END MATH
+                     (re.compile(r"(.*?)\</div\>\</html\>$"),
+                      "    \\1",
+                      LineTypes.LATEX,
+                      States.NORMAL),
+                 #CONTINUE MATH
+                     (re.compile("(.*)"),
+                      "    \\1",
+                      LineTypes.LATEX,
+                      States.MATH),
+                ],
+            States.TRACEBACK:[
+                 #END Traceback
+                     (re.compile(r"^(\S.*)"),
+                      "    ...\n    \\1",
+                      LineTypes.TRACE,
+                      States.NORMAL),
+                ],
+            States.HTML:[
+                 #END HTML
+                     (re.compile(r".*</html\>$"),
+                      "",
+                      LineTypes.HTML,
+                      States.NORMAL),
+                ],
+        }
+    def parse(self, text):
+        result_plain = []
+        result_show = []
+        state = States.NORMAL
+        for line in text.splitlines():
+            for regex, replacement, line_type, new_state in self.transitions[state]:
+                if regex.match(line):
+                    result = result_plain if line_type in (LineTypes.PLAIN, LineTypes.HTML)\
+                             else result_show
+                    result.append( regex.sub(replacement, line))
+                    state = new_state
+                    break
+        result_plain.extend(result_show)
+        return '\n'.join(result_plain)
+def results2rst(text, images_dir):
+    r"""Converts the result of evaluation of notebook cells
+    into rst compatible with Sage documentation.
+    Several common patterns are identified, and treated
+    accordingly. Some patterns are dropped, while others
+    are not recognized.
+    Currently, latex and images are recognized and converted.
+    INPUT:
+    - ``text`` -- string -- a chunk of HTML text
+    - ``images_dir`` -- string -- folder where images are stored
+    OUTPUT:
+    - string -- rst text
+    EXAMPLES::
+        >>> from sage_sws2rst.results2rst import results2rst
+        >>> s="<html><font color='black'><img src='cell://sage0.png'></font></html>"
+        >>> results2rst(s,'')
+        '\n.. image:: sage0.png\n    :align: center\n'
+        >>> results2rst("4",'')
+        '    4    '
+        >>> s=r'<html><div class="math">\newcommand{\Bold}[1]{\mathbf{#1}}\frac{3}{2}</div></html>'
+        >>> results2rst(s,'')
+        '\n.. MATH::\n\n    \\frac{3}{2}\n'
+    """
+    Parser = ResultsParser(images_dir)
+    return Parser.parse(text)
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)

sage_sws2rst/worksheet2rst.py ADDED Viewed

@@ -0,0 +1,203 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+r"""
+Convert worksheet.html files into ReStructuredText documents
+This is called by 'sage -sws2rst'. Can also be used as a commandline script
+(if BeautifulSoup is installed):
+``python worksheet2rst.py worksheet.html``
+or
+``cat worksheet.html | python worksheet2rst.py``
+AUTHOR:
+- Pablo Angulo Ardoy (2011-02-25): initial version
+The content of worksheet.html is split into comments, code, and output
+(the result of evaluating the code), as follows:
+comments
+{{{id=..|
+code
+///
+results
+}}}
+Each kind of text is dealt with separately.
+"""
+#**************************************************
+# Copyright (C) 2011 Pablo Angulo
+#
+# Distributed under the terms of the GPL License
+#**************************************************
+import sys
+import os
+import re
+from .comments2rst import html2rst
+from .results2rst import results2rst
+import codecs
+#We parse lines one by one but keep track of current scope
+#comments
+#{{{id=..|
+#code
+#///
+#results
+#}}}
+#RESULT_TO_BE_DROPPED corresponds to a results section whose
+#code was empty, and will be discarded, whether it's empty or not
+class States(object):
+    COMMENT = 0
+    CODE = 1
+    RESULT = 2
+    RESULT_TO_BE_DROPPED = 3
+# REs for splitting comments, code and results
+START_CELL_RE = re.compile(r'^\{\{\{id=(\d*)\|')
+END_CODE_RE   = re.compile(r'^\/\/\/')
+END_CELL_RE   = re.compile(r'^\}\}\}')
+#When to switch State, and which State to
+transitions = {
+    States.COMMENT:(
+        START_CELL_RE,
+        States.CODE
+        ),
+    States.CODE:(
+        END_CODE_RE,
+        States.RESULT),
+    States.RESULT:(
+        END_CELL_RE,
+        States.COMMENT),
+    States.RESULT_TO_BE_DROPPED:(
+        END_CELL_RE,
+        States.COMMENT)
+    }
+def code_parser(text):
+    """
+    Arguments:
+    INPUT:
+    - ``s``:sage code, may or may not start with "sage:"
+    OUTPUT:
+    - string -- rst text
+    EXAMPLES (not used for unit test, see
+    http://groups.google.com/group/sage-devel/browse_thread/thread/d82cb049ac102f3a)
+    : from sage_sws2rst.worksheet2rst import code_parser
+    : s="a=2"
+    : code_parser(s)
+    '::\n\n    sage: a=2'
+    : s="def f(n):\n    return n+1\n"
+    : code_parser(s)
+    '::\n\n    sage: def f(n):\n    ....:     return n+1'
+    : s="sage: def f(n):\nsage:     return n+1\n"
+    : code_parser(s)
+    '::\n\n    sage: def f(n):\n    ....:     return n+1'
+    """
+    lines = ['::', '']
+    for s in text.splitlines():
+        l = s[6:] if s.startswith('sage: ') else s
+        if not l: continue
+        prefix = '    ....: ' if l[0] == ' ' else '    sage: '
+        lines.append(prefix + l)
+    return '\n'.join(lines)
+HEADER_RE = re.compile(r'<h\d>')
+def add_title_if_there_is_none(text):
+    if not HEADER_RE.search(text):
+        return '<h1>Please write a title for this worksheet!</h1>\n' + text
+    else:
+        return text
+def worksheet2rst(s, images_dir=''):
+    """Parses a string, tipically the content of the file
+    worksheet.html inside a sws file, and converts it into
+    rst compatible with Sage documentation.
+    INPUT:
+    - ``s`` -- string -- text, tipically the content of
+                               worksheet.html
+    - ``images_dir`` -- string -- folder where images are stored
+    OUTPUT:
+    - string -- rst text
+    EXAMPLES (not used for unit test, see
+    http://groups.google.com/group/sage-devel/browse_thread/thread/d82cb049ac102f3a)
+    : from sage_sws2rst.worksheet2rst import worksheet2rst
+    : worksheet2rst('<p>some text</p>\n{{{id=1|\nprint 2+2\n///\n4\n}}}')
+    u'.. -*- coding: utf-8 -*-\n\nPlease write a title for this worksheet!\n========================================\n\nsome text\n\n\n::\n\n    sage: print 2+2\n    4\n\n.. end of output\n'
+    : s = '{{{id=2|\nshow(f)\n///\n<html><div class="math">\\sqrt{x}</div></html>\n}}}\n'
+    : worksheet2rst(s)
+    u'.. -*- coding: utf-8 -*-\n\nPlease write a title for this worksheet!\n========================================\n::\n\n    sage: show(f)\n\n.. MATH::\n\n    \\sqrt{x}\n\n.. end of output\n'
+    """
+    s = add_title_if_there_is_none(s)
+    state = States.COMMENT
+    result = ['.. -*- coding: utf-8 -*-\n']
+    ls = []
+    for line in s.splitlines():
+        regex, next_state= transitions[state]
+        m = regex.match(line)
+        if m:
+            if state == States.COMMENT:
+                last_cell_id = m.group(1)
+                img_path = images_dir + os.path.sep
+                result.append(html2rst('\n'.join(ls), img_path))
+            elif state == States.RESULT:
+                img_path = os.path.join(images_dir, 'cell_%s_' % last_cell_id)
+                result.append(results2rst('\n'.join(ls),
+                                             img_path))
+                result.append('')
+                result.append('.. end of output')
+            elif state == States.CODE:
+                if ls and any(ls):
+                    result.append(code_parser('\n'.join(ls)))
+                else:
+                    next_state = States.RESULT_TO_BE_DROPPED
+            ls = []
+            state = next_state
+        else:
+            ls.append(line)
+    if state == States.COMMENT:
+        img_path = images_dir + os.path.sep
+        result.append(html2rst('\n'.join(ls), img_path))
+    elif state == States.RESULT:
+        img_path = os.path.join(images_dir, 'cell_%s_' % last_cell_id)
+        result.append(result_parser('\n'.join(ls),
+                                     img_path))
+        result.append('')
+        result.append('.. end of output')
+    elif state == States.CODE:
+        result.append(code_parser('\n'.join(ls)))
+    return '\n'.join(result)
+if __name__=='__main__':
+    if len(sys.argv)>1:
+        fichero = codecs.open(sys.argv[1], mode='r', encoding='utf-8')
+        text = fichero.read()
+        fichero.close()
+    else:
+        text = sys.stdin.read()
+    images_dir = sys.argv[2] if len(sys.argv)>2 else ''
+    print((worksheet2rst(text, images_dir).encode('utf-8')))