RubyGems - ots - Versions diffs - 0.4.3 → 0.4.4 - Mend

ots 0.4.3 → 0.4.4

Files changed (64) hide show

data/ext/article.c ADDED

@@ -0,0 +1,119 @@
+/*
+ *  article.c
+ *
+ *  Copyright (C) 2003 Nadav Rotem <nadav256@hotmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Library General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "libots.h"
+#include "grader-tc.h"
+extern void ots_free_TF_wordlist (GList * aList);
+#define MAX_WORD_LENGTH 35
+/*Data structure related functions*/
+OtsSentence *
+ots_new_sentence (void)
+{
+  OtsSentence *aLine = g_new0 (OtsSentence, 1);
+  aLine->words = NULL;
+  aLine->wc = 0;
+  aLine->selected = 0;
+  aLine->score = 0;
+  return aLine;
+}
+void
+ots_free_sentence (OtsSentence * sen)
+{
+  if (sen != NULL)
+    {
+      g_list_foreach (sen->words, (GFunc) g_free, NULL);
+      g_list_free (sen->words);
+      g_free (sen);
+    }
+sen=NULL;
+}
+OtsArticle *
+ots_new_article (void)
+{
+  OtsArticle *Doc;
+  Doc = g_new0 (OtsArticle, 1);
+  Doc->lineCount = 0;
+  Doc->title = NULL;
+  Doc->stem=new_stem_rule ();
+  Doc->lines=NULL;
+  Doc->dict = NULL;
+  Doc->ImpWords = NULL;
+  Doc->wordStat = NULL;
+  Doc->tf_terms=NULL;
+  return Doc;
+}
+void
+ots_free_article (OtsArticle * art)
+{
+  if (NULL != art)
+    {
+      free_stem_rule (art->stem);
+  	   ots_free_wordlist (art->dict);
+  	   ots_free_wordlist (art->ImpWords);
+	   ots_free_wordlist (art->wordStat);
+      ots_free_TF_wordlist(art->tf_terms);
+	   g_list_foreach (art->lines, (GFunc) ots_free_sentence, NULL);
+      g_list_free (art->lines);
+      if (art->title != NULL) g_free (art->title);
+      g_free (art);
+    }
+art=NULL;
+}
+OtsSentence *
+ots_append_line (OtsArticle * Doc)
+{
+  OtsSentence *aLine = ots_new_sentence ();
+  Doc->lineCount++;
+  Doc->lines = g_list_append (Doc->lines, aLine);
+  return aLine;
+}
+void
+ots_append_word (OtsSentence * aLine,unsigned const char *aWord)
+{
+  if ((aWord == NULL) || (0==strlen(aWord)) ||(NULL==aLine)) return;
+  aLine->wc++;
+  aLine->words = g_list_append (aLine->words, (gpointer) g_strdup (aWord));
+  return;
+}
+gboolean
+ots_is_line_selected(const OtsSentence *aLine)
+{
+  if (aLine==NULL) {printf("Warning:Line=NULL\n"); return FALSE;}
+  return (aLine->selected);
+}

data/ext/dictionary.c ADDED

@@ -0,0 +1,335 @@
+/*
+ *  dictionary.c
+ *
+ *  Copyright (C) 2003 Nadav Rotem <nadav256@hotmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Library General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "libots.h"
+#include "grader-tc.h"
+#include <libxml/xmlmemory.h>
+#include <libxml/parser.h>
+/* loads the xml dictionary file to memory*/
+gboolean
+ots_load_xml_dictionary (OtsArticle * Doc, const char *name)
+{
+  xmlDocPtr doc=NULL;
+  xmlNodePtr head=NULL;
+  xmlNodePtr stem=NULL;
+  xmlNodePtr pre=NULL;
+  xmlNodePtr post=NULL;
+  xmlNodePtr syno=NULL;	    		/* synonyms     */
+  xmlNodePtr manual=NULL;	   	/* manual  */
+  xmlNodePtr step1_pre=NULL;		/* step1  */
+  xmlNodePtr step1_post=NULL;		/* step1  */
+  xmlNodePtr parse=NULL;	   	/* parser rules */
+  xmlNodePtr pbreak=NULL;
+  xmlNodePtr pdbreak=NULL;
+  xmlNodePtr tc_words=NULL;	   	/* term count dictionary   */
+  xmlNodePtr tf_words=NULL;	   	/* term frequency dictionary   */
+  OtsStemRule * rule=Doc->stem;
+  char *dict_name;
+  char *local_dict_name;
+  dict_name = g_strdup_printf ("%s%s.xml", DICTIONARY_DIR, name);
+  local_dict_name = g_strdup_printf ("%s.xml", name);
+	if (g_file_test(local_dict_name,G_FILE_TEST_EXISTS))
+		  doc = xmlParseFile (local_dict_name); /* it warns to the screen so we cant use it; enable for web services only */
+  if (doc == NULL)   doc = xmlParseFile (dict_name);
+  if (doc == NULL) return (FALSE);
+  head = xmlDocGetRootElement (doc);
+  if (head == NULL)
+    {
+      fprintf (stderr, "empty document\n");
+      xmlFreeDoc (doc);
+      return (FALSE);
+    }
+  if (xmlStrcmp (head->name, (const xmlChar *) "dictionary"))
+    {
+      fprintf (stderr, "%s", head->name);
+      xmlFreeDoc (doc);
+      return (FALSE);
+    }
+  if (head != NULL)
+    stem = head->xmlChildrenNode;
+  while ((stem != NULL)
+	 && (xmlStrcmp (stem->name, (const xmlChar *) "stemmer")))
+    {
+      stem = stem->next;
+    }
+  if (head != NULL)
+    parse = head->xmlChildrenNode;
+  while ((parse != NULL)
+	 && (xmlStrcmp (parse->name, (const xmlChar *) "parser")))
+    {
+      parse = parse->next;
+    }
+  if (head != NULL)
+    tc_words = head->xmlChildrenNode;
+  while ((tc_words != NULL)
+	 && (xmlStrcmp (tc_words->name, (const xmlChar *) "grader-tc")))
+    {
+      tc_words = tc_words->next;
+    }
+  if (head != NULL)
+    tf_words = head->xmlChildrenNode;
+  while ((tf_words != NULL)
+	 && (xmlStrcmp (tf_words->name, (const xmlChar *) "grader-tf")))
+    {
+      tf_words = tf_words->next;
+    }
+  if (stem != NULL)
+    pre = stem->xmlChildrenNode;
+  while ((pre != NULL) && (xmlStrcmp (pre->name, (const xmlChar *) "pre")))
+    {
+      pre = pre->next;
+    }
+  if (stem != NULL)
+    post = stem->xmlChildrenNode;
+  while ((post != NULL) && (xmlStrcmp (post->name, (const xmlChar *) "post")))
+    {
+      post = post->next;
+    }
+  if (stem != NULL)
+    syno = stem->xmlChildrenNode;
+  while ((syno != NULL)
+	 && (xmlStrcmp (syno->name, (const xmlChar *) "synonyms")))
+    {
+      syno = syno->next;
+    }
+  if (stem != NULL)
+    manual = stem->xmlChildrenNode;
+  while ((manual != NULL)
+	 && (xmlStrcmp (manual->name, (const xmlChar *) "manual")))
+    {
+      manual = manual->next;
+    }
+  if (stem != NULL)
+    step1_pre = stem->xmlChildrenNode;
+  while ((step1_pre != NULL)
+	 && (xmlStrcmp (step1_pre->name, (const xmlChar *) "step1_pre")))
+    {
+      step1_pre = step1_pre->next;
+    }
+  if (stem != NULL)
+    step1_post = stem->xmlChildrenNode;
+  while ((step1_post != NULL)
+	 && (xmlStrcmp (step1_post->name, (const xmlChar *) "step1_post")))
+    {
+      step1_post = step1_post->next;
+    }
+  if (pre != NULL)
+    pre = pre->xmlChildrenNode;	/*point to first word */
+  while (pre != NULL)
+    {
+      if (0 == xmlStrcmp (pre->name, (const xmlChar *) "rule"))
+	rule->RemovePre =
+	  g_list_append (rule->RemovePre,
+			 (xmlNodeListGetString
+				 (doc, pre->xmlChildrenNode, 1)));
+      pre = pre->next;
+    }
+  if (post != NULL)
+    post = post->xmlChildrenNode;
+  while (post != NULL)
+    {
+      if (0 == xmlStrcmp (post->name, (const xmlChar *) "rule"))
+	rule->RemovePost =
+	  g_list_append (rule->RemovePost,
+			       (xmlNodeListGetString
+				 (doc, post->xmlChildrenNode, 1)));
+      post = post->next;
+    }
+  if (syno != NULL)
+    syno = syno->xmlChildrenNode;
+  while (syno != NULL)
+    {
+      if (0 == xmlStrcmp (syno->name, (const xmlChar *) "rule"))
+	rule->synonyms =
+	  g_list_append (rule->synonyms,
+			     (xmlNodeListGetString
+				 (doc, syno->xmlChildrenNode, 1)));
+      syno = syno->next;
+    }
+  if (manual != NULL)
+    manual = manual->xmlChildrenNode;
+  while (manual != NULL)
+    {
+      if (0 == xmlStrcmp (manual->name, (const xmlChar *) "rule"))
+	rule->manual =
+	  g_list_append (rule->manual,
+			      (xmlNodeListGetString
+				 (doc, manual->xmlChildrenNode, 1)));
+      manual = manual->next;
+    }
+ if (step1_pre != NULL)
+    step1_pre = step1_pre->xmlChildrenNode;
+  while (step1_pre != NULL)
+    {
+      if (0 == xmlStrcmp (step1_pre->name, (const xmlChar *) "rule"))
+	rule->step1_pre =
+	  g_list_append (rule->step1_pre,
+			      (xmlNodeListGetString
+				 (doc, step1_pre->xmlChildrenNode, 1)));
+      step1_pre = step1_pre->next;
+    }
+ if (step1_post != NULL)
+    step1_post = step1_post->xmlChildrenNode;
+  while (step1_post != NULL)
+    {
+      if (0 == xmlStrcmp (step1_post->name, (const xmlChar *) "rule"))
+	rule->step1_post =
+	  g_list_append (rule->step1_post,
+			        (xmlNodeListGetString
+				 (doc, step1_post->xmlChildrenNode, 1)));
+      step1_post = step1_post->next;
+    }
+ if (parse != NULL)
+    pbreak = parse->xmlChildrenNode;
+  while ((pbreak != NULL) && (xmlStrcmp (pbreak->name, (const xmlChar *) "linebreak")))
+    {
+      pbreak = pbreak->next;
+    }
+ if (parse != NULL)
+    pdbreak = parse->xmlChildrenNode;
+  while ((pdbreak != NULL) && (xmlStrcmp (pdbreak->name, (const xmlChar *) "linedontbreak")))
+    {
+      pdbreak = pdbreak->next;
+    }
+  /*Parser break*/
+  if (pbreak != NULL)
+    pbreak = pbreak->xmlChildrenNode;
+  while (pbreak != NULL)
+    {
+      if (0 == xmlStrcmp (pbreak->name, (const xmlChar *) "rule"))
+	rule->ParserBreak =
+	  g_list_append (rule->ParserBreak,
+			        (xmlNodeListGetString
+				 (doc, pbreak->xmlChildrenNode, 1)));
+      pbreak = pbreak->next;
+    }
+  /*Parser Don't break*/
+  if (pdbreak != NULL)
+    pdbreak = pdbreak->xmlChildrenNode;
+  while (pdbreak != NULL)
+    {
+      if (0 == xmlStrcmp (pdbreak->name, (const xmlChar *) "rule"))
+	rule->ParserDontBreak =
+	  g_list_append (rule->ParserDontBreak,
+			        (xmlNodeListGetString
+				 (doc, pdbreak->xmlChildrenNode, 1)));
+      pdbreak = pdbreak->next;
+    }
+  /*Term Count load dict*/
+  if (tc_words != NULL)
+    tc_words = tc_words->xmlChildrenNode;
+  while (tc_words != NULL)
+    {
+      if (0 == xmlStrcmp (tc_words->name, (const xmlChar *) "word"))
+	{
+		xmlChar *key;
+		key=xmlNodeListGetString(doc, tc_words->xmlChildrenNode,1);
+	   Doc->dict = g_list_append (Doc->dict,(gpointer)ots_new_wordEntery(key));
+      xmlFree(key);
+   }
+      tc_words = tc_words->next;
+    }
+  /*Term Frequency load dict*/
+  if (tf_words != NULL)
+    tf_words = tf_words->xmlChildrenNode;
+  while (tf_words != NULL)
+    {
+      if (0 == xmlStrcmp (tf_words->name, (const xmlChar *) "word"))
+	{
+		xmlChar *key;
+		xmlChar *idf_key;
+		key=xmlNodeListGetString(doc, tf_words->xmlChildrenNode,1);
+	   idf_key=xmlGetProp(tf_words,"idf");
+	   Doc->tf_terms = g_list_append (Doc->tf_terms,ots_new_OtsWordTF(key,atof(idf_key)));
+      xmlFree(key);
+      xmlFree(idf_key);
+   }
+      tf_words = tf_words->next;
+    }
+  xmlFreeDoc(doc);
+  //xmlCleanupParser ();
+  g_free(dict_name);
+  g_free(local_dict_name);
+  return (TRUE);
+}